VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103667

Last change on this file since 103667 was 103667, checked in by vboxsync, 9 months ago

VMM/IEM: Native translation of IEM_MC_REF_MXCSR() body, bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 671.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103667 2024-03-04 13:46:36Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when it wants to raise a \#NM.
1601 */
1602IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1603{
1604 iemRaiseDeviceNotAvailableJmp(pVCpu);
1605#ifndef _MSC_VER
1606 return VINF_IEM_RAISED_XCPT; /* not reached */
1607#endif
1608}
1609
1610
1611/**
1612 * Used by TB code when it wants to raise a \#UD.
1613 */
1614IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1615{
1616 iemRaiseUndefinedOpcodeJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise a \#MF.
1625 */
1626IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1627{
1628 iemRaiseMathFaultJmp(pVCpu);
1629#ifndef _MSC_VER
1630 return VINF_IEM_RAISED_XCPT; /* not reached */
1631#endif
1632}
1633
1634
1635/**
1636 * Used by TB code when detecting opcode changes.
1637 * @see iemThreadeFuncWorkerObsoleteTb
1638 */
1639IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1640{
1641 /* We set fSafeToFree to false where as we're being called in the context
1642 of a TB callback function, which for native TBs means we cannot release
1643 the executable memory till we've returned our way back to iemTbExec as
1644 that return path codes via the native code generated for the TB. */
1645 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1646 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1647 return VINF_IEM_REEXEC_BREAK;
1648}
1649
1650
1651/**
1652 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1653 */
1654IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1655{
1656 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1657 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1658 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1659 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1660 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1661 return VINF_IEM_REEXEC_BREAK;
1662}
1663
1664
1665/**
1666 * Used by TB code when we missed a PC check after a branch.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1669{
1670 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1671 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1672 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1673 pVCpu->iem.s.pbInstrBuf));
1674 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1675 return VINF_IEM_REEXEC_BREAK;
1676}
1677
1678
1679
1680/*********************************************************************************************************************************
1681* Helpers: Segmented memory fetches and stores. *
1682*********************************************************************************************************************************/
1683
1684/**
1685 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1686 */
1687IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1688{
1689#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1690 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1691#else
1692 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1693#endif
1694}
1695
1696
1697/**
1698 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1699 * to 16 bits.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1702{
1703#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1704 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1705#else
1706 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1707#endif
1708}
1709
1710
1711/**
1712 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1713 * to 32 bits.
1714 */
1715IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1716{
1717#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1718 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1719#else
1720 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1721#endif
1722}
1723
1724/**
1725 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1726 * to 64 bits.
1727 */
1728IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1729{
1730#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1731 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1732#else
1733 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1734#endif
1735}
1736
1737
1738/**
1739 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1753 * to 32 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1767 * to 64 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778
1779/**
1780 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1794 * to 64 bits.
1795 */
1796IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1797{
1798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1799 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1800#else
1801 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1802#endif
1803}
1804
1805
1806/**
1807 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1821 */
1822IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1823{
1824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1825 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1826#else
1827 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1828#endif
1829}
1830
1831
1832/**
1833 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1834 */
1835IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1836{
1837#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1838 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1839#else
1840 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1841#endif
1842}
1843
1844
1845/**
1846 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1847 */
1848IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1849{
1850#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1851 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1852#else
1853 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1854#endif
1855}
1856
1857
1858/**
1859 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1860 */
1861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1862{
1863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1864 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1865#else
1866 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1867#endif
1868}
1869
1870
1871
1872/**
1873 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1874 */
1875IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1876{
1877#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1878 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1879#else
1880 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1881#endif
1882}
1883
1884
1885/**
1886 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1887 */
1888IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1889{
1890#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1891 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1892#else
1893 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1894#endif
1895}
1896
1897
1898/**
1899 * Used by TB code to store an 32-bit selector value onto a generic stack.
1900 *
1901 * Intel CPUs doesn't do write a whole dword, thus the special function.
1902 */
1903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1904{
1905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1906 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1907#else
1908 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1909#endif
1910}
1911
1912
1913/**
1914 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1917{
1918#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1919 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1920#else
1921 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1922#endif
1923}
1924
1925
1926/**
1927 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1928 */
1929IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1930{
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1932 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1933#else
1934 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1935#endif
1936}
1937
1938
1939/**
1940 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1941 */
1942IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1943{
1944#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1945 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1946#else
1947 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1948#endif
1949}
1950
1951
1952/**
1953 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1954 */
1955IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1956{
1957#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1958 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1959#else
1960 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1961#endif
1962}
1963
1964
1965
1966/*********************************************************************************************************************************
1967* Helpers: Flat memory fetches and stores. *
1968*********************************************************************************************************************************/
1969
1970/**
1971 * Used by TB code to load unsigned 8-bit data w/ flat address.
1972 * @note Zero extending the value to 64-bit to simplify assembly.
1973 */
1974IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1975{
1976#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1977 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1978#else
1979 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1980#endif
1981}
1982
1983
1984/**
1985 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1986 * to 16 bits.
1987 * @note Zero extending the value to 64-bit to simplify assembly.
1988 */
1989IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1990{
1991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1992 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1993#else
1994 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1995#endif
1996}
1997
1998
1999/**
2000 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2001 * to 32 bits.
2002 * @note Zero extending the value to 64-bit to simplify assembly.
2003 */
2004IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2005{
2006#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2007 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2008#else
2009 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2010#endif
2011}
2012
2013
2014/**
2015 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2016 * to 64 bits.
2017 */
2018IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2019{
2020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2021 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2022#else
2023 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2024#endif
2025}
2026
2027
2028/**
2029 * Used by TB code to load unsigned 16-bit data w/ flat address.
2030 * @note Zero extending the value to 64-bit to simplify assembly.
2031 */
2032IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2033{
2034#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2035 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2036#else
2037 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2038#endif
2039}
2040
2041
2042/**
2043 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2044 * to 32 bits.
2045 * @note Zero extending the value to 64-bit to simplify assembly.
2046 */
2047IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2048{
2049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2050 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2051#else
2052 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2053#endif
2054}
2055
2056
2057/**
2058 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2059 * to 64 bits.
2060 * @note Zero extending the value to 64-bit to simplify assembly.
2061 */
2062IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2063{
2064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2065 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2066#else
2067 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2068#endif
2069}
2070
2071
2072/**
2073 * Used by TB code to load unsigned 32-bit data w/ flat address.
2074 * @note Zero extending the value to 64-bit to simplify assembly.
2075 */
2076IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2077{
2078#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2079 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2080#else
2081 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2082#endif
2083}
2084
2085
2086/**
2087 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2088 * to 64 bits.
2089 * @note Zero extending the value to 64-bit to simplify assembly.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2092{
2093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2094 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2095#else
2096 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2097#endif
2098}
2099
2100
2101/**
2102 * Used by TB code to load unsigned 64-bit data w/ flat address.
2103 */
2104IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2105{
2106#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2107 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2108#else
2109 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2110#endif
2111}
2112
2113
2114/**
2115 * Used by TB code to store unsigned 8-bit data w/ flat address.
2116 */
2117IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2118{
2119#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2120 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2121#else
2122 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2123#endif
2124}
2125
2126
2127/**
2128 * Used by TB code to store unsigned 16-bit data w/ flat address.
2129 */
2130IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2131{
2132#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2133 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2134#else
2135 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2136#endif
2137}
2138
2139
2140/**
2141 * Used by TB code to store unsigned 32-bit data w/ flat address.
2142 */
2143IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2144{
2145#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2146 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2147#else
2148 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2149#endif
2150}
2151
2152
2153/**
2154 * Used by TB code to store unsigned 64-bit data w/ flat address.
2155 */
2156IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2157{
2158#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2159 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2160#else
2161 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2162#endif
2163}
2164
2165
2166
2167/**
2168 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2169 */
2170IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2171{
2172#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2173 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2174#else
2175 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2176#endif
2177}
2178
2179
2180/**
2181 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2182 */
2183IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2184{
2185#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2186 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2187#else
2188 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2189#endif
2190}
2191
2192
2193/**
2194 * Used by TB code to store a segment selector value onto a flat stack.
2195 *
2196 * Intel CPUs doesn't do write a whole dword, thus the special function.
2197 */
2198IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2199{
2200#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2201 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2202#else
2203 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2204#endif
2205}
2206
2207
2208/**
2209 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2210 */
2211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2212{
2213#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2214 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2215#else
2216 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2217#endif
2218}
2219
2220
2221/**
2222 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2227 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2228#else
2229 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2236 */
2237IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2238{
2239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2240 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2241#else
2242 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2243#endif
2244}
2245
2246
2247/**
2248 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2249 */
2250IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2251{
2252#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2253 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2254#else
2255 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2256#endif
2257}
2258
2259
2260
2261/*********************************************************************************************************************************
2262* Helpers: Segmented memory mapping. *
2263*********************************************************************************************************************************/
2264
2265/**
2266 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2267 * segmentation.
2268 */
2269IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2270 RTGCPTR GCPtrMem, uint8_t iSegReg))
2271{
2272#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2273 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2274#else
2275 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2276#endif
2277}
2278
2279
2280/**
2281 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2282 */
2283IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2284 RTGCPTR GCPtrMem, uint8_t iSegReg))
2285{
2286#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2287 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2288#else
2289 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#endif
2291}
2292
2293
2294/**
2295 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2296 */
2297IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2298 RTGCPTR GCPtrMem, uint8_t iSegReg))
2299{
2300#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2301 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2302#else
2303 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#endif
2305}
2306
2307
2308/**
2309 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2310 */
2311IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2312 RTGCPTR GCPtrMem, uint8_t iSegReg))
2313{
2314#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2315 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2316#else
2317 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2318#endif
2319}
2320
2321
2322/**
2323 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2324 * segmentation.
2325 */
2326IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2327 RTGCPTR GCPtrMem, uint8_t iSegReg))
2328{
2329#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2330 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2331#else
2332 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2333#endif
2334}
2335
2336
2337/**
2338 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2339 */
2340IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2341 RTGCPTR GCPtrMem, uint8_t iSegReg))
2342{
2343#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2344 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2345#else
2346 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#endif
2348}
2349
2350
2351/**
2352 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2353 */
2354IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2355 RTGCPTR GCPtrMem, uint8_t iSegReg))
2356{
2357#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2358 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2359#else
2360 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#endif
2362}
2363
2364
2365/**
2366 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2367 */
2368IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2369 RTGCPTR GCPtrMem, uint8_t iSegReg))
2370{
2371#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2372 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2373#else
2374 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2375#endif
2376}
2377
2378
2379/**
2380 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2381 * segmentation.
2382 */
2383IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2384 RTGCPTR GCPtrMem, uint8_t iSegReg))
2385{
2386#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2387 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2388#else
2389 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2390#endif
2391}
2392
2393
2394/**
2395 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2396 */
2397IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2398 RTGCPTR GCPtrMem, uint8_t iSegReg))
2399{
2400#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2401 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2402#else
2403 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#endif
2405}
2406
2407
2408/**
2409 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2410 */
2411IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2412 RTGCPTR GCPtrMem, uint8_t iSegReg))
2413{
2414#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2415 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2416#else
2417 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#endif
2419}
2420
2421
2422/**
2423 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2424 */
2425IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2426 RTGCPTR GCPtrMem, uint8_t iSegReg))
2427{
2428#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2429 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2430#else
2431 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2432#endif
2433}
2434
2435
2436/**
2437 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2438 * segmentation.
2439 */
2440IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2441 RTGCPTR GCPtrMem, uint8_t iSegReg))
2442{
2443#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2444 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2445#else
2446 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2447#endif
2448}
2449
2450
2451/**
2452 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2453 */
2454IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2455 RTGCPTR GCPtrMem, uint8_t iSegReg))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2458 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2459#else
2460 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2469 RTGCPTR GCPtrMem, uint8_t iSegReg))
2470{
2471#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2472 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2473#else
2474 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#endif
2476}
2477
2478
2479/**
2480 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2481 */
2482IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2483 RTGCPTR GCPtrMem, uint8_t iSegReg))
2484{
2485#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2486 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2487#else
2488 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#endif
2490}
2491
2492
2493/**
2494 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2495 */
2496IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2497 RTGCPTR GCPtrMem, uint8_t iSegReg))
2498{
2499#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2500 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2501#else
2502 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#endif
2504}
2505
2506
2507/**
2508 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2509 */
2510IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2511 RTGCPTR GCPtrMem, uint8_t iSegReg))
2512{
2513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2514 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2515#else
2516 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2517#endif
2518}
2519
2520
2521/**
2522 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2523 * segmentation.
2524 */
2525IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2526 RTGCPTR GCPtrMem, uint8_t iSegReg))
2527{
2528#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2529 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2530#else
2531 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2532#endif
2533}
2534
2535
2536/**
2537 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2538 */
2539IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2540 RTGCPTR GCPtrMem, uint8_t iSegReg))
2541{
2542#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2543 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2544#else
2545 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#endif
2547}
2548
2549
2550/**
2551 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2552 */
2553IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2554 RTGCPTR GCPtrMem, uint8_t iSegReg))
2555{
2556#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2557 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2558#else
2559 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#endif
2561}
2562
2563
2564/**
2565 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2566 */
2567IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2568 RTGCPTR GCPtrMem, uint8_t iSegReg))
2569{
2570#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2571 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2572#else
2573 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2574#endif
2575}
2576
2577
2578/*********************************************************************************************************************************
2579* Helpers: Flat memory mapping. *
2580*********************************************************************************************************************************/
2581
2582/**
2583 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2584 * address.
2585 */
2586IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2587{
2588#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2589 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2590#else
2591 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2592#endif
2593}
2594
2595
2596/**
2597 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2598 */
2599IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2600{
2601#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2602 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2603#else
2604 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2605#endif
2606}
2607
2608
2609/**
2610 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2611 */
2612IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2613{
2614#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2615 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2616#else
2617 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2618#endif
2619}
2620
2621
2622/**
2623 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2624 */
2625IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2626{
2627#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2628 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2629#else
2630 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2631#endif
2632}
2633
2634
2635/**
2636 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2637 * address.
2638 */
2639IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2640{
2641#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2642 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2643#else
2644 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2645#endif
2646}
2647
2648
2649/**
2650 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2651 */
2652IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2653{
2654#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2655 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2656#else
2657 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2658#endif
2659}
2660
2661
2662/**
2663 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2664 */
2665IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2666{
2667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2668 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2669#else
2670 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2671#endif
2672}
2673
2674
2675/**
2676 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2677 */
2678IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2679{
2680#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2681 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2682#else
2683 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2684#endif
2685}
2686
2687
2688/**
2689 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2690 * address.
2691 */
2692IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2693{
2694#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2695 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2696#else
2697 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2698#endif
2699}
2700
2701
2702/**
2703 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2704 */
2705IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2706{
2707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2708 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2709#else
2710 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2711#endif
2712}
2713
2714
2715/**
2716 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2717 */
2718IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2719{
2720#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2721 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2722#else
2723 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2724#endif
2725}
2726
2727
2728/**
2729 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2730 */
2731IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2732{
2733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2734 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2735#else
2736 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2737#endif
2738}
2739
2740
2741/**
2742 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2743 * address.
2744 */
2745IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2746{
2747#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2748 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2749#else
2750 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2751#endif
2752}
2753
2754
2755/**
2756 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2757 */
2758IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2759{
2760#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2761 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2762#else
2763 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2764#endif
2765}
2766
2767
2768/**
2769 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2770 */
2771IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2772{
2773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2774 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2775#else
2776 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2777#endif
2778}
2779
2780
2781/**
2782 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2783 */
2784IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2785{
2786#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2787 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2788#else
2789 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2790#endif
2791}
2792
2793
2794/**
2795 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2796 */
2797IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2798{
2799#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2800 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2801#else
2802 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2803#endif
2804}
2805
2806
2807/**
2808 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2809 */
2810IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2811{
2812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2813 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2814#else
2815 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2816#endif
2817}
2818
2819
2820/**
2821 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2822 * address.
2823 */
2824IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2825{
2826#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2827 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2828#else
2829 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2830#endif
2831}
2832
2833
2834/**
2835 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2836 */
2837IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2838{
2839#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2840 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2841#else
2842 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2843#endif
2844}
2845
2846
2847/**
2848 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2849 */
2850IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2851{
2852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2853 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2854#else
2855 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2856#endif
2857}
2858
2859
2860/**
2861 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2862 */
2863IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2864{
2865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2866 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2867#else
2868 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2869#endif
2870}
2871
2872
2873/*********************************************************************************************************************************
2874* Helpers: Commit, rollback & unmap *
2875*********************************************************************************************************************************/
2876
2877/**
2878 * Used by TB code to commit and unmap a read-write memory mapping.
2879 */
2880IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2881{
2882 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2883}
2884
2885
2886/**
2887 * Used by TB code to commit and unmap a read-write memory mapping.
2888 */
2889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2890{
2891 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2892}
2893
2894
2895/**
2896 * Used by TB code to commit and unmap a write-only memory mapping.
2897 */
2898IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2899{
2900 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2901}
2902
2903
2904/**
2905 * Used by TB code to commit and unmap a read-only memory mapping.
2906 */
2907IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2908{
2909 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2910}
2911
2912
2913/**
2914 * Reinitializes the native recompiler state.
2915 *
2916 * Called before starting a new recompile job.
2917 */
2918static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2919{
2920 pReNative->cLabels = 0;
2921 pReNative->bmLabelTypes = 0;
2922 pReNative->cFixups = 0;
2923#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2924 pReNative->pDbgInfo->cEntries = 0;
2925#endif
2926 pReNative->pTbOrg = pTb;
2927 pReNative->cCondDepth = 0;
2928 pReNative->uCondSeqNo = 0;
2929 pReNative->uCheckIrqSeqNo = 0;
2930 pReNative->uTlbSeqNo = 0;
2931
2932#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2933 pReNative->Core.offPc = 0;
2934 pReNative->Core.cInstrPcUpdateSkipped = 0;
2935#endif
2936 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2937#if IEMNATIVE_HST_GREG_COUNT < 32
2938 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2939#endif
2940 ;
2941 pReNative->Core.bmHstRegsWithGstShadow = 0;
2942 pReNative->Core.bmGstRegShadows = 0;
2943 pReNative->Core.bmVars = 0;
2944 pReNative->Core.bmStack = 0;
2945 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2946 pReNative->Core.u64ArgVars = UINT64_MAX;
2947
2948 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 12);
2949 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2950 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2951 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2952 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2953 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2954 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2955 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2956 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2957 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2958 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2959 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2960 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2961
2962 /* Full host register reinit: */
2963 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2964 {
2965 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2966 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2967 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2968 }
2969
2970 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2971 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2972#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2973 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2974#endif
2975#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2976 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2977#endif
2978#ifdef IEMNATIVE_REG_FIXED_TMP1
2979 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2980#endif
2981#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2982 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2983#endif
2984 );
2985 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2986 {
2987 fRegs &= ~RT_BIT_32(idxReg);
2988 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2989 }
2990
2991 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2992#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2993 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2994#endif
2995#ifdef IEMNATIVE_REG_FIXED_TMP0
2996 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_TMP1
2999 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3000#endif
3001#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3002 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3003#endif
3004 return pReNative;
3005}
3006
3007
3008/**
3009 * Allocates and initializes the native recompiler state.
3010 *
3011 * This is called the first time an EMT wants to recompile something.
3012 *
3013 * @returns Pointer to the new recompiler state.
3014 * @param pVCpu The cross context virtual CPU structure of the calling
3015 * thread.
3016 * @param pTb The TB that's about to be recompiled.
3017 * @thread EMT(pVCpu)
3018 */
3019static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3020{
3021 VMCPU_ASSERT_EMT(pVCpu);
3022
3023 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3024 AssertReturn(pReNative, NULL);
3025
3026 /*
3027 * Try allocate all the buffers and stuff we need.
3028 */
3029 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3030 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3031 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3032#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3033 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3034#endif
3035 if (RT_LIKELY( pReNative->pInstrBuf
3036 && pReNative->paLabels
3037 && pReNative->paFixups)
3038#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3039 && pReNative->pDbgInfo
3040#endif
3041 )
3042 {
3043 /*
3044 * Set the buffer & array sizes on success.
3045 */
3046 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3047 pReNative->cLabelsAlloc = _8K;
3048 pReNative->cFixupsAlloc = _16K;
3049#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3050 pReNative->cDbgInfoAlloc = _16K;
3051#endif
3052
3053 /* Other constant stuff: */
3054 pReNative->pVCpu = pVCpu;
3055
3056 /*
3057 * Done, just need to save it and reinit it.
3058 */
3059 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3060 return iemNativeReInit(pReNative, pTb);
3061 }
3062
3063 /*
3064 * Failed. Cleanup and return.
3065 */
3066 AssertFailed();
3067 RTMemFree(pReNative->pInstrBuf);
3068 RTMemFree(pReNative->paLabels);
3069 RTMemFree(pReNative->paFixups);
3070#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3071 RTMemFree(pReNative->pDbgInfo);
3072#endif
3073 RTMemFree(pReNative);
3074 return NULL;
3075}
3076
3077
3078/**
3079 * Creates a label
3080 *
3081 * If the label does not yet have a defined position,
3082 * call iemNativeLabelDefine() later to set it.
3083 *
3084 * @returns Label ID. Throws VBox status code on failure, so no need to check
3085 * the return value.
3086 * @param pReNative The native recompile state.
3087 * @param enmType The label type.
3088 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3089 * label is not yet defined (default).
3090 * @param uData Data associated with the lable. Only applicable to
3091 * certain type of labels. Default is zero.
3092 */
3093DECL_HIDDEN_THROW(uint32_t)
3094iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3095 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3096{
3097 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3098
3099 /*
3100 * Locate existing label definition.
3101 *
3102 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3103 * and uData is zero.
3104 */
3105 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3106 uint32_t const cLabels = pReNative->cLabels;
3107 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3108#ifndef VBOX_STRICT
3109 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3110 && offWhere == UINT32_MAX
3111 && uData == 0
3112#endif
3113 )
3114 {
3115#ifndef VBOX_STRICT
3116 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3117 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3118 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3119 if (idxLabel < pReNative->cLabels)
3120 return idxLabel;
3121#else
3122 for (uint32_t i = 0; i < cLabels; i++)
3123 if ( paLabels[i].enmType == enmType
3124 && paLabels[i].uData == uData)
3125 {
3126 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3127 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3128 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3129 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3130 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3131 return i;
3132 }
3133 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3134 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3135#endif
3136 }
3137
3138 /*
3139 * Make sure we've got room for another label.
3140 */
3141 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3142 { /* likely */ }
3143 else
3144 {
3145 uint32_t cNew = pReNative->cLabelsAlloc;
3146 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3147 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3148 cNew *= 2;
3149 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3150 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3151 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3152 pReNative->paLabels = paLabels;
3153 pReNative->cLabelsAlloc = cNew;
3154 }
3155
3156 /*
3157 * Define a new label.
3158 */
3159 paLabels[cLabels].off = offWhere;
3160 paLabels[cLabels].enmType = enmType;
3161 paLabels[cLabels].uData = uData;
3162 pReNative->cLabels = cLabels + 1;
3163
3164 Assert((unsigned)enmType < 64);
3165 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3166
3167 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3168 {
3169 Assert(uData == 0);
3170 pReNative->aidxUniqueLabels[enmType] = cLabels;
3171 }
3172
3173 if (offWhere != UINT32_MAX)
3174 {
3175#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3176 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3177 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3178#endif
3179 }
3180 return cLabels;
3181}
3182
3183
3184/**
3185 * Defines the location of an existing label.
3186 *
3187 * @param pReNative The native recompile state.
3188 * @param idxLabel The label to define.
3189 * @param offWhere The position.
3190 */
3191DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3192{
3193 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3194 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3195 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3196 pLabel->off = offWhere;
3197#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3198 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3199 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3200#endif
3201}
3202
3203
3204/**
3205 * Looks up a lable.
3206 *
3207 * @returns Label ID if found, UINT32_MAX if not.
3208 */
3209static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3210 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3211{
3212 Assert((unsigned)enmType < 64);
3213 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3214 {
3215 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3216 return pReNative->aidxUniqueLabels[enmType];
3217
3218 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3219 uint32_t const cLabels = pReNative->cLabels;
3220 for (uint32_t i = 0; i < cLabels; i++)
3221 if ( paLabels[i].enmType == enmType
3222 && paLabels[i].uData == uData
3223 && ( paLabels[i].off == offWhere
3224 || offWhere == UINT32_MAX
3225 || paLabels[i].off == UINT32_MAX))
3226 return i;
3227 }
3228 return UINT32_MAX;
3229}
3230
3231
3232/**
3233 * Adds a fixup.
3234 *
3235 * @throws VBox status code (int) on failure.
3236 * @param pReNative The native recompile state.
3237 * @param offWhere The instruction offset of the fixup location.
3238 * @param idxLabel The target label ID for the fixup.
3239 * @param enmType The fixup type.
3240 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3241 */
3242DECL_HIDDEN_THROW(void)
3243iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3244 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3245{
3246 Assert(idxLabel <= UINT16_MAX);
3247 Assert((unsigned)enmType <= UINT8_MAX);
3248
3249 /*
3250 * Make sure we've room.
3251 */
3252 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3253 uint32_t const cFixups = pReNative->cFixups;
3254 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3255 { /* likely */ }
3256 else
3257 {
3258 uint32_t cNew = pReNative->cFixupsAlloc;
3259 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3260 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3261 cNew *= 2;
3262 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3263 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3264 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3265 pReNative->paFixups = paFixups;
3266 pReNative->cFixupsAlloc = cNew;
3267 }
3268
3269 /*
3270 * Add the fixup.
3271 */
3272 paFixups[cFixups].off = offWhere;
3273 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3274 paFixups[cFixups].enmType = enmType;
3275 paFixups[cFixups].offAddend = offAddend;
3276 pReNative->cFixups = cFixups + 1;
3277}
3278
3279
3280/**
3281 * Slow code path for iemNativeInstrBufEnsure.
3282 */
3283DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3284{
3285 /* Double the buffer size till we meet the request. */
3286 uint32_t cNew = pReNative->cInstrBufAlloc;
3287 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3288 do
3289 cNew *= 2;
3290 while (cNew < off + cInstrReq);
3291
3292 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3293#ifdef RT_ARCH_ARM64
3294 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3295#else
3296 uint32_t const cbMaxInstrBuf = _2M;
3297#endif
3298 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3299
3300 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3301 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3302
3303#ifdef VBOX_STRICT
3304 pReNative->offInstrBufChecked = off + cInstrReq;
3305#endif
3306 pReNative->cInstrBufAlloc = cNew;
3307 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3308}
3309
3310#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3311
3312/**
3313 * Grows the static debug info array used during recompilation.
3314 *
3315 * @returns Pointer to the new debug info block; throws VBox status code on
3316 * failure, so no need to check the return value.
3317 */
3318DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3319{
3320 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3321 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3322 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3323 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3324 pReNative->pDbgInfo = pDbgInfo;
3325 pReNative->cDbgInfoAlloc = cNew;
3326 return pDbgInfo;
3327}
3328
3329
3330/**
3331 * Adds a new debug info uninitialized entry, returning the pointer to it.
3332 */
3333DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3334{
3335 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3336 { /* likely */ }
3337 else
3338 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3339 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3340}
3341
3342
3343/**
3344 * Debug Info: Adds a native offset record, if necessary.
3345 */
3346static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3347{
3348 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3349
3350 /*
3351 * Search backwards to see if we've got a similar record already.
3352 */
3353 uint32_t idx = pDbgInfo->cEntries;
3354 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3355 while (idx-- > idxStop)
3356 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3357 {
3358 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3359 return;
3360 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3361 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3362 break;
3363 }
3364
3365 /*
3366 * Add it.
3367 */
3368 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3369 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3370 pEntry->NativeOffset.offNative = off;
3371}
3372
3373
3374/**
3375 * Debug Info: Record info about a label.
3376 */
3377static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3378{
3379 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3380 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3381 pEntry->Label.uUnused = 0;
3382 pEntry->Label.enmLabel = (uint8_t)enmType;
3383 pEntry->Label.uData = uData;
3384}
3385
3386
3387/**
3388 * Debug Info: Record info about a threaded call.
3389 */
3390static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3391{
3392 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3393 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3394 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3395 pEntry->ThreadedCall.uUnused = 0;
3396 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3397}
3398
3399
3400/**
3401 * Debug Info: Record info about a new guest instruction.
3402 */
3403static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3404{
3405 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3406 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3407 pEntry->GuestInstruction.uUnused = 0;
3408 pEntry->GuestInstruction.fExec = fExec;
3409}
3410
3411
3412/**
3413 * Debug Info: Record info about guest register shadowing.
3414 */
3415static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3416 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3417{
3418 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3419 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3420 pEntry->GuestRegShadowing.uUnused = 0;
3421 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3422 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3423 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3424}
3425
3426
3427# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3428/**
3429 * Debug Info: Record info about delayed RIP updates.
3430 */
3431static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3432{
3433 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3434 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3435 pEntry->DelayedPcUpdate.offPc = offPc;
3436 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3437}
3438# endif
3439
3440#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3441
3442
3443/*********************************************************************************************************************************
3444* Register Allocator *
3445*********************************************************************************************************************************/
3446
3447/**
3448 * Register parameter indexes (indexed by argument number).
3449 */
3450DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3451{
3452 IEMNATIVE_CALL_ARG0_GREG,
3453 IEMNATIVE_CALL_ARG1_GREG,
3454 IEMNATIVE_CALL_ARG2_GREG,
3455 IEMNATIVE_CALL_ARG3_GREG,
3456#if defined(IEMNATIVE_CALL_ARG4_GREG)
3457 IEMNATIVE_CALL_ARG4_GREG,
3458# if defined(IEMNATIVE_CALL_ARG5_GREG)
3459 IEMNATIVE_CALL_ARG5_GREG,
3460# if defined(IEMNATIVE_CALL_ARG6_GREG)
3461 IEMNATIVE_CALL_ARG6_GREG,
3462# if defined(IEMNATIVE_CALL_ARG7_GREG)
3463 IEMNATIVE_CALL_ARG7_GREG,
3464# endif
3465# endif
3466# endif
3467#endif
3468};
3469
3470/**
3471 * Call register masks indexed by argument count.
3472 */
3473DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3474{
3475 0,
3476 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3477 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3478 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3479 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3480 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3481#if defined(IEMNATIVE_CALL_ARG4_GREG)
3482 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3483 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3484# if defined(IEMNATIVE_CALL_ARG5_GREG)
3485 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3486 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3487# if defined(IEMNATIVE_CALL_ARG6_GREG)
3488 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3489 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3490 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3491# if defined(IEMNATIVE_CALL_ARG7_GREG)
3492 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3493 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3494 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3495# endif
3496# endif
3497# endif
3498#endif
3499};
3500
3501#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3502/**
3503 * BP offset of the stack argument slots.
3504 *
3505 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3506 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3507 */
3508DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3509{
3510 IEMNATIVE_FP_OFF_STACK_ARG0,
3511# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3512 IEMNATIVE_FP_OFF_STACK_ARG1,
3513# endif
3514# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3515 IEMNATIVE_FP_OFF_STACK_ARG2,
3516# endif
3517# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3518 IEMNATIVE_FP_OFF_STACK_ARG3,
3519# endif
3520};
3521AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3522#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3523
3524/**
3525 * Info about shadowed guest register values.
3526 * @see IEMNATIVEGSTREG
3527 */
3528static struct
3529{
3530 /** Offset in VMCPU. */
3531 uint32_t off;
3532 /** The field size. */
3533 uint8_t cb;
3534 /** Name (for logging). */
3535 const char *pszName;
3536} const g_aGstShadowInfo[] =
3537{
3538#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3539 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3540 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3541 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3542 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3543 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3544 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3545 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3546 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3547 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3548 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3549 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3550 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3551 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3552 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3553 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3554 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3555 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3556 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3557 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3558 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3559 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3560 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3561 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3562 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3563 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3564 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3565 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3566 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3567 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3568 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3569 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3570 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3571 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3572 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3573 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3574 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3575 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3576 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3577 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3578 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3579 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3580 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3581 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3582 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3583 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3584 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3585 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3586 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3587#undef CPUMCTX_OFF_AND_SIZE
3588};
3589AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3590
3591
3592/** Host CPU general purpose register names. */
3593DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3594{
3595#ifdef RT_ARCH_AMD64
3596 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3597#elif RT_ARCH_ARM64
3598 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3599 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3600#else
3601# error "port me"
3602#endif
3603};
3604
3605
3606DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3607 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3608{
3609 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3610
3611 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3612 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3613 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3614 return (uint8_t)idxReg;
3615}
3616
3617
3618#if 0 /* unused */
3619/**
3620 * Tries to locate a suitable register in the given register mask.
3621 *
3622 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3623 * failed.
3624 *
3625 * @returns Host register number on success, returns UINT8_MAX on failure.
3626 */
3627static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3628{
3629 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3630 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3631 if (fRegs)
3632 {
3633 /** @todo pick better here: */
3634 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3635
3636 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3637 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3638 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3639 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3640
3641 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3642 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3643 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3644 return idxReg;
3645 }
3646 return UINT8_MAX;
3647}
3648#endif /* unused */
3649
3650
3651/**
3652 * Locate a register, possibly freeing one up.
3653 *
3654 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3655 * failed.
3656 *
3657 * @returns Host register number on success. Returns UINT8_MAX if no registers
3658 * found, the caller is supposed to deal with this and raise a
3659 * allocation type specific status code (if desired).
3660 *
3661 * @throws VBox status code if we're run into trouble spilling a variable of
3662 * recording debug info. Does NOT throw anything if we're out of
3663 * registers, though.
3664 */
3665static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3666 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3667{
3668 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3669 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3670 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3671
3672 /*
3673 * Try a freed register that's shadowing a guest register.
3674 */
3675 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3676 if (fRegs)
3677 {
3678 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3679
3680#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3681 /*
3682 * When we have livness information, we use it to kick out all shadowed
3683 * guest register that will not be needed any more in this TB. If we're
3684 * lucky, this may prevent us from ending up here again.
3685 *
3686 * Note! We must consider the previous entry here so we don't free
3687 * anything that the current threaded function requires (current
3688 * entry is produced by the next threaded function).
3689 */
3690 uint32_t const idxCurCall = pReNative->idxCurCall;
3691 if (idxCurCall > 0)
3692 {
3693 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3694
3695# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3696 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3697 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3698 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3699#else
3700 /* Construct a mask of the registers not in the read or write state.
3701 Note! We could skips writes, if they aren't from us, as this is just
3702 a hack to prevent trashing registers that have just been written
3703 or will be written when we retire the current instruction. */
3704 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3705 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3706 & IEMLIVENESSBIT_MASK;
3707#endif
3708 /* Merge EFLAGS. */
3709 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3710 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3711 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3712 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3713 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3714
3715 /* If it matches any shadowed registers. */
3716 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3717 {
3718 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3719 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3720 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3721
3722 /* See if we've got any unshadowed registers we can return now. */
3723 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3724 if (fUnshadowedRegs)
3725 {
3726 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3727 return (fPreferVolatile
3728 ? ASMBitFirstSetU32(fUnshadowedRegs)
3729 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3730 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3731 - 1;
3732 }
3733 }
3734 }
3735#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3736
3737 unsigned const idxReg = (fPreferVolatile
3738 ? ASMBitFirstSetU32(fRegs)
3739 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3740 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3741 - 1;
3742
3743 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3744 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3745 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3746 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3747
3748 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3749 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3750 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3751 return idxReg;
3752 }
3753
3754 /*
3755 * Try free up a variable that's in a register.
3756 *
3757 * We do two rounds here, first evacuating variables we don't need to be
3758 * saved on the stack, then in the second round move things to the stack.
3759 */
3760 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3761 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3762 {
3763 uint32_t fVars = pReNative->Core.bmVars;
3764 while (fVars)
3765 {
3766 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3767 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3768 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3769 && (RT_BIT_32(idxReg) & fRegMask)
3770 && ( iLoop == 0
3771 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3772 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3773 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3774 {
3775 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3776 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3777 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3778 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3779 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3780 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3781
3782 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3783 {
3784 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3785 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3786 }
3787
3788 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3789 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3790
3791 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3792 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3793 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3794 return idxReg;
3795 }
3796 fVars &= ~RT_BIT_32(idxVar);
3797 }
3798 }
3799
3800 return UINT8_MAX;
3801}
3802
3803
3804/**
3805 * Reassigns a variable to a different register specified by the caller.
3806 *
3807 * @returns The new code buffer position.
3808 * @param pReNative The native recompile state.
3809 * @param off The current code buffer position.
3810 * @param idxVar The variable index.
3811 * @param idxRegOld The old host register number.
3812 * @param idxRegNew The new host register number.
3813 * @param pszCaller The caller for logging.
3814 */
3815static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3816 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3817{
3818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3819 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3820 RT_NOREF(pszCaller);
3821
3822 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3823
3824 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3825 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3826 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3827 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3828
3829 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3830 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3831 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3832 if (fGstRegShadows)
3833 {
3834 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3835 | RT_BIT_32(idxRegNew);
3836 while (fGstRegShadows)
3837 {
3838 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3839 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3840
3841 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3842 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3843 }
3844 }
3845
3846 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3847 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3848 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3849 return off;
3850}
3851
3852
3853/**
3854 * Moves a variable to a different register or spills it onto the stack.
3855 *
3856 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3857 * kinds can easily be recreated if needed later.
3858 *
3859 * @returns The new code buffer position.
3860 * @param pReNative The native recompile state.
3861 * @param off The current code buffer position.
3862 * @param idxVar The variable index.
3863 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3864 * call-volatile registers.
3865 */
3866static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3867 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3868{
3869 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3870 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3871 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3872 Assert(!pVar->fRegAcquired);
3873
3874 uint8_t const idxRegOld = pVar->idxReg;
3875 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3876 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3877 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3878 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3879 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3880 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3881 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3882 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3883
3884
3885 /** @todo Add statistics on this.*/
3886 /** @todo Implement basic variable liveness analysis (python) so variables
3887 * can be freed immediately once no longer used. This has the potential to
3888 * be trashing registers and stack for dead variables.
3889 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3890
3891 /*
3892 * First try move it to a different register, as that's cheaper.
3893 */
3894 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3895 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3896 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3897 if (fRegs)
3898 {
3899 /* Avoid using shadow registers, if possible. */
3900 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3901 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3902 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3903 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3904 }
3905
3906 /*
3907 * Otherwise we must spill the register onto the stack.
3908 */
3909 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3910 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3911 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3912 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3913
3914 pVar->idxReg = UINT8_MAX;
3915 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3916 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3917 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3918 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3919 return off;
3920}
3921
3922
3923/**
3924 * Allocates a temporary host general purpose register.
3925 *
3926 * This may emit code to save register content onto the stack in order to free
3927 * up a register.
3928 *
3929 * @returns The host register number; throws VBox status code on failure,
3930 * so no need to check the return value.
3931 * @param pReNative The native recompile state.
3932 * @param poff Pointer to the variable with the code buffer position.
3933 * This will be update if we need to move a variable from
3934 * register to stack in order to satisfy the request.
3935 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3936 * registers (@c true, default) or the other way around
3937 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3938 */
3939DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3940{
3941 /*
3942 * Try find a completely unused register, preferably a call-volatile one.
3943 */
3944 uint8_t idxReg;
3945 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3946 & ~pReNative->Core.bmHstRegsWithGstShadow
3947 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3948 if (fRegs)
3949 {
3950 if (fPreferVolatile)
3951 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3952 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3953 else
3954 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3955 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3956 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3957 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3958 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3959 }
3960 else
3961 {
3962 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3963 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3964 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3965 }
3966 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3967}
3968
3969
3970/**
3971 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3972 * registers.
3973 *
3974 * @returns The host register number; throws VBox status code on failure,
3975 * so no need to check the return value.
3976 * @param pReNative The native recompile state.
3977 * @param poff Pointer to the variable with the code buffer position.
3978 * This will be update if we need to move a variable from
3979 * register to stack in order to satisfy the request.
3980 * @param fRegMask Mask of acceptable registers.
3981 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3982 * registers (@c true, default) or the other way around
3983 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3984 */
3985DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3986 bool fPreferVolatile /*= true*/)
3987{
3988 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3989 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3990
3991 /*
3992 * Try find a completely unused register, preferably a call-volatile one.
3993 */
3994 uint8_t idxReg;
3995 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3996 & ~pReNative->Core.bmHstRegsWithGstShadow
3997 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3998 & fRegMask;
3999 if (fRegs)
4000 {
4001 if (fPreferVolatile)
4002 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4003 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4004 else
4005 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4006 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4007 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4008 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4009 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4010 }
4011 else
4012 {
4013 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4014 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4015 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4016 }
4017 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4018}
4019
4020
4021/**
4022 * Allocates a temporary register for loading an immediate value into.
4023 *
4024 * This will emit code to load the immediate, unless there happens to be an
4025 * unused register with the value already loaded.
4026 *
4027 * The caller will not modify the returned register, it must be considered
4028 * read-only. Free using iemNativeRegFreeTmpImm.
4029 *
4030 * @returns The host register number; throws VBox status code on failure, so no
4031 * need to check the return value.
4032 * @param pReNative The native recompile state.
4033 * @param poff Pointer to the variable with the code buffer position.
4034 * @param uImm The immediate value that the register must hold upon
4035 * return.
4036 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4037 * registers (@c true, default) or the other way around
4038 * (@c false).
4039 *
4040 * @note Reusing immediate values has not been implemented yet.
4041 */
4042DECL_HIDDEN_THROW(uint8_t)
4043iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4044{
4045 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4046 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4047 return idxReg;
4048}
4049
4050#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4051
4052# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4053/**
4054 * Helper for iemNativeLivenessGetStateByGstReg.
4055 *
4056 * @returns IEMLIVENESS_STATE_XXX
4057 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4058 * ORed together.
4059 */
4060DECL_FORCE_INLINE(uint32_t)
4061iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4062{
4063 /* INPUT trumps anything else. */
4064 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4065 return IEMLIVENESS_STATE_INPUT;
4066
4067 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4068 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4069 {
4070 /* If not all sub-fields are clobbered they must be considered INPUT. */
4071 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4072 return IEMLIVENESS_STATE_INPUT;
4073 return IEMLIVENESS_STATE_CLOBBERED;
4074 }
4075
4076 /* XCPT_OR_CALL trumps UNUSED. */
4077 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4078 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4079
4080 return IEMLIVENESS_STATE_UNUSED;
4081}
4082# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4083
4084
4085DECL_FORCE_INLINE(uint32_t)
4086iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4087{
4088# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4089 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4090 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4091# else
4092 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4093 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4094 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4095 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4096# endif
4097}
4098
4099
4100DECL_FORCE_INLINE(uint32_t)
4101iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4102{
4103 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4104 if (enmGstReg == kIemNativeGstReg_EFlags)
4105 {
4106 /* Merge the eflags states to one. */
4107# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4108 uRet = RT_BIT_32(uRet);
4109 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4110 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4111 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4112 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4113 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4114 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4115 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4116# else
4117 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4118 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4119 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4120 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4121 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4122 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4123 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4124# endif
4125 }
4126 return uRet;
4127}
4128
4129
4130# ifdef VBOX_STRICT
4131/** For assertions only, user checks that idxCurCall isn't zerow. */
4132DECL_FORCE_INLINE(uint32_t)
4133iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4134{
4135 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4136}
4137# endif /* VBOX_STRICT */
4138
4139#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4140
4141/**
4142 * Marks host register @a idxHstReg as containing a shadow copy of guest
4143 * register @a enmGstReg.
4144 *
4145 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4146 * host register before calling.
4147 */
4148DECL_FORCE_INLINE(void)
4149iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4150{
4151 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4152 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4153 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4154
4155 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4156 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4157 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4158 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4159#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4160 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4161 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4162#else
4163 RT_NOREF(off);
4164#endif
4165}
4166
4167
4168/**
4169 * Clear any guest register shadow claims from @a idxHstReg.
4170 *
4171 * The register does not need to be shadowing any guest registers.
4172 */
4173DECL_FORCE_INLINE(void)
4174iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4175{
4176 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4177 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4178 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4179 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4180 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4181
4182#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4183 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4184 if (fGstRegs)
4185 {
4186 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4187 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4188 while (fGstRegs)
4189 {
4190 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4191 fGstRegs &= ~RT_BIT_64(iGstReg);
4192 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4193 }
4194 }
4195#else
4196 RT_NOREF(off);
4197#endif
4198
4199 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4200 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4201 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4202}
4203
4204
4205/**
4206 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4207 * and global overview flags.
4208 */
4209DECL_FORCE_INLINE(void)
4210iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4211{
4212 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4213 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4214 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4215 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4216 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4217 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4218 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4219
4220#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4221 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4222 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4223#else
4224 RT_NOREF(off);
4225#endif
4226
4227 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4228 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4229 if (!fGstRegShadowsNew)
4230 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4231 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4232}
4233
4234
4235#if 0 /* unused */
4236/**
4237 * Clear any guest register shadow claim for @a enmGstReg.
4238 */
4239DECL_FORCE_INLINE(void)
4240iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4241{
4242 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4243 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4244 {
4245 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4246 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4247 }
4248}
4249#endif
4250
4251
4252/**
4253 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4254 * as the new shadow of it.
4255 *
4256 * Unlike the other guest reg shadow helpers, this does the logging for you.
4257 * However, it is the liveness state is not asserted here, the caller must do
4258 * that.
4259 */
4260DECL_FORCE_INLINE(void)
4261iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4262 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4263{
4264 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4265 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4266 {
4267 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4268 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4269 if (idxHstRegOld == idxHstRegNew)
4270 return;
4271 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4272 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4273 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4274 }
4275 else
4276 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4277 g_aGstShadowInfo[enmGstReg].pszName));
4278 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4279}
4280
4281
4282/**
4283 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4284 * to @a idxRegTo.
4285 */
4286DECL_FORCE_INLINE(void)
4287iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4288 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4289{
4290 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4291 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4292 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4293 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4294 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4295 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4296 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4297 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4298 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4299
4300 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4301 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4302 if (!fGstRegShadowsFrom)
4303 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4304 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4305 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4306 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4307#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4308 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4309 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4310#else
4311 RT_NOREF(off);
4312#endif
4313}
4314
4315
4316/**
4317 * Allocates a temporary host general purpose register for keeping a guest
4318 * register value.
4319 *
4320 * Since we may already have a register holding the guest register value,
4321 * code will be emitted to do the loading if that's not the case. Code may also
4322 * be emitted if we have to free up a register to satify the request.
4323 *
4324 * @returns The host register number; throws VBox status code on failure, so no
4325 * need to check the return value.
4326 * @param pReNative The native recompile state.
4327 * @param poff Pointer to the variable with the code buffer
4328 * position. This will be update if we need to move a
4329 * variable from register to stack in order to satisfy
4330 * the request.
4331 * @param enmGstReg The guest register that will is to be updated.
4332 * @param enmIntendedUse How the caller will be using the host register.
4333 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4334 * register is okay (default). The ASSUMPTION here is
4335 * that the caller has already flushed all volatile
4336 * registers, so this is only applied if we allocate a
4337 * new register.
4338 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4339 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4340 */
4341DECL_HIDDEN_THROW(uint8_t)
4342iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4343 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4344 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4345{
4346 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4347#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4348 AssertMsg( fSkipLivenessAssert
4349 || pReNative->idxCurCall == 0
4350 || enmGstReg == kIemNativeGstReg_Pc
4351 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4352 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4353 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4354 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4355 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4356 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4357#endif
4358 RT_NOREF(fSkipLivenessAssert);
4359#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4360 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4361#endif
4362 uint32_t const fRegMask = !fNoVolatileRegs
4363 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4364 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4365
4366 /*
4367 * First check if the guest register value is already in a host register.
4368 */
4369 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4370 {
4371 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4372 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4373 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4374 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4375
4376 /* It's not supposed to be allocated... */
4377 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4378 {
4379 /*
4380 * If the register will trash the guest shadow copy, try find a
4381 * completely unused register we can use instead. If that fails,
4382 * we need to disassociate the host reg from the guest reg.
4383 */
4384 /** @todo would be nice to know if preserving the register is in any way helpful. */
4385 /* If the purpose is calculations, try duplicate the register value as
4386 we'll be clobbering the shadow. */
4387 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4388 && ( ~pReNative->Core.bmHstRegs
4389 & ~pReNative->Core.bmHstRegsWithGstShadow
4390 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4391 {
4392 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4393
4394 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4395
4396 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4397 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4398 g_apszIemNativeHstRegNames[idxRegNew]));
4399 idxReg = idxRegNew;
4400 }
4401 /* If the current register matches the restrictions, go ahead and allocate
4402 it for the caller. */
4403 else if (fRegMask & RT_BIT_32(idxReg))
4404 {
4405 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4406 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4407 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4408 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4409 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4410 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4411 else
4412 {
4413 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4414 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4415 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4416 }
4417 }
4418 /* Otherwise, allocate a register that satisfies the caller and transfer
4419 the shadowing if compatible with the intended use. (This basically
4420 means the call wants a non-volatile register (RSP push/pop scenario).) */
4421 else
4422 {
4423 Assert(fNoVolatileRegs);
4424 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4425 !fNoVolatileRegs
4426 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4427 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4428 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4429 {
4430 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4431 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4432 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4433 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4434 }
4435 else
4436 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4437 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4438 g_apszIemNativeHstRegNames[idxRegNew]));
4439 idxReg = idxRegNew;
4440 }
4441 }
4442 else
4443 {
4444 /*
4445 * Oops. Shadowed guest register already allocated!
4446 *
4447 * Allocate a new register, copy the value and, if updating, the
4448 * guest shadow copy assignment to the new register.
4449 */
4450 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4451 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4452 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4453 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4454
4455 /** @todo share register for readonly access. */
4456 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4457 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4458
4459 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4460 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4461
4462 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4463 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4464 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4465 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4466 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4467 else
4468 {
4469 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4470 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4471 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4472 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4473 }
4474 idxReg = idxRegNew;
4475 }
4476 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4477
4478#ifdef VBOX_STRICT
4479 /* Strict builds: Check that the value is correct. */
4480 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4481#endif
4482
4483 return idxReg;
4484 }
4485
4486 /*
4487 * Allocate a new register, load it with the guest value and designate it as a copy of the
4488 */
4489 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4490
4491 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4492 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4493
4494 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4495 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4496 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4497 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4498
4499 return idxRegNew;
4500}
4501
4502
4503/**
4504 * Allocates a temporary host general purpose register that already holds the
4505 * given guest register value.
4506 *
4507 * The use case for this function is places where the shadowing state cannot be
4508 * modified due to branching and such. This will fail if the we don't have a
4509 * current shadow copy handy or if it's incompatible. The only code that will
4510 * be emitted here is value checking code in strict builds.
4511 *
4512 * The intended use can only be readonly!
4513 *
4514 * @returns The host register number, UINT8_MAX if not present.
4515 * @param pReNative The native recompile state.
4516 * @param poff Pointer to the instruction buffer offset.
4517 * Will be updated in strict builds if a register is
4518 * found.
4519 * @param enmGstReg The guest register that will is to be updated.
4520 * @note In strict builds, this may throw instruction buffer growth failures.
4521 * Non-strict builds will not throw anything.
4522 * @sa iemNativeRegAllocTmpForGuestReg
4523 */
4524DECL_HIDDEN_THROW(uint8_t)
4525iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4526{
4527 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4528#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4529 AssertMsg( pReNative->idxCurCall == 0
4530 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4531 || enmGstReg == kIemNativeGstReg_Pc,
4532 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4533#endif
4534
4535 /*
4536 * First check if the guest register value is already in a host register.
4537 */
4538 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4539 {
4540 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4541 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4542 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4543 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4544
4545 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4546 {
4547 /*
4548 * We only do readonly use here, so easy compared to the other
4549 * variant of this code.
4550 */
4551 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4552 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4553 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4554 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4555 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4556
4557#ifdef VBOX_STRICT
4558 /* Strict builds: Check that the value is correct. */
4559 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4560#else
4561 RT_NOREF(poff);
4562#endif
4563 return idxReg;
4564 }
4565 }
4566
4567 return UINT8_MAX;
4568}
4569
4570
4571/**
4572 * Allocates argument registers for a function call.
4573 *
4574 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4575 * need to check the return value.
4576 * @param pReNative The native recompile state.
4577 * @param off The current code buffer offset.
4578 * @param cArgs The number of arguments the function call takes.
4579 */
4580DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4581{
4582 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4583 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4584 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4585 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4586
4587 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4588 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4589 else if (cArgs == 0)
4590 return true;
4591
4592 /*
4593 * Do we get luck and all register are free and not shadowing anything?
4594 */
4595 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4596 for (uint32_t i = 0; i < cArgs; i++)
4597 {
4598 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4599 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4600 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4601 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4602 }
4603 /*
4604 * Okay, not lucky so we have to free up the registers.
4605 */
4606 else
4607 for (uint32_t i = 0; i < cArgs; i++)
4608 {
4609 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4610 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4611 {
4612 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4613 {
4614 case kIemNativeWhat_Var:
4615 {
4616 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4617 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4618 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4619 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4620 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4621
4622 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4623 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4624 else
4625 {
4626 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4627 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4628 }
4629 break;
4630 }
4631
4632 case kIemNativeWhat_Tmp:
4633 case kIemNativeWhat_Arg:
4634 case kIemNativeWhat_rc:
4635 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4636 default:
4637 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4638 }
4639
4640 }
4641 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4642 {
4643 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4644 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4645 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4646 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4647 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4648 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4649 }
4650 else
4651 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4652 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4653 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4654 }
4655 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4656 return true;
4657}
4658
4659
4660DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4661
4662
4663#if 0
4664/**
4665 * Frees a register assignment of any type.
4666 *
4667 * @param pReNative The native recompile state.
4668 * @param idxHstReg The register to free.
4669 *
4670 * @note Does not update variables.
4671 */
4672DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4673{
4674 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4675 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4676 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4677 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4678 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4679 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4680 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4681 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4682 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4683 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4684 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4685 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4686 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4687 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4688
4689 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4690 /* no flushing, right:
4691 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4692 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4693 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4694 */
4695}
4696#endif
4697
4698
4699/**
4700 * Frees a temporary register.
4701 *
4702 * Any shadow copies of guest registers assigned to the host register will not
4703 * be flushed by this operation.
4704 */
4705DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4706{
4707 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4708 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4709 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4710 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4711 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4712}
4713
4714
4715/**
4716 * Frees a temporary immediate register.
4717 *
4718 * It is assumed that the call has not modified the register, so it still hold
4719 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4720 */
4721DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4722{
4723 iemNativeRegFreeTmp(pReNative, idxHstReg);
4724}
4725
4726
4727/**
4728 * Frees a register assigned to a variable.
4729 *
4730 * The register will be disassociated from the variable.
4731 */
4732DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4733{
4734 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4735 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4736 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4737 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4738 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4739
4740 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4741 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4742 if (!fFlushShadows)
4743 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4744 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4745 else
4746 {
4747 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4748 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4749 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4750 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4751 uint64_t fGstRegShadows = fGstRegShadowsOld;
4752 while (fGstRegShadows)
4753 {
4754 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4755 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4756
4757 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4758 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4759 }
4760 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4761 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4762 }
4763}
4764
4765
4766/**
4767 * Called right before emitting a call instruction to move anything important
4768 * out of call-volatile registers, free and flush the call-volatile registers,
4769 * optionally freeing argument variables.
4770 *
4771 * @returns New code buffer offset, UINT32_MAX on failure.
4772 * @param pReNative The native recompile state.
4773 * @param off The code buffer offset.
4774 * @param cArgs The number of arguments the function call takes.
4775 * It is presumed that the host register part of these have
4776 * been allocated as such already and won't need moving,
4777 * just freeing.
4778 * @param fKeepVars Mask of variables that should keep their register
4779 * assignments. Caller must take care to handle these.
4780 */
4781DECL_HIDDEN_THROW(uint32_t)
4782iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4783{
4784 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4785
4786 /* fKeepVars will reduce this mask. */
4787 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4788
4789 /*
4790 * Move anything important out of volatile registers.
4791 */
4792 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4793 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4794 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4795#ifdef IEMNATIVE_REG_FIXED_TMP0
4796 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4797#endif
4798#ifdef IEMNATIVE_REG_FIXED_TMP1
4799 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4800#endif
4801#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4802 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4803#endif
4804 & ~g_afIemNativeCallRegs[cArgs];
4805
4806 fRegsToMove &= pReNative->Core.bmHstRegs;
4807 if (!fRegsToMove)
4808 { /* likely */ }
4809 else
4810 {
4811 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4812 while (fRegsToMove != 0)
4813 {
4814 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4815 fRegsToMove &= ~RT_BIT_32(idxReg);
4816
4817 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4818 {
4819 case kIemNativeWhat_Var:
4820 {
4821 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4822 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4823 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4824 Assert(pVar->idxReg == idxReg);
4825 if (!(RT_BIT_32(idxVar) & fKeepVars))
4826 {
4827 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4828 idxVar, pVar->enmKind, pVar->idxReg));
4829 if (pVar->enmKind != kIemNativeVarKind_Stack)
4830 pVar->idxReg = UINT8_MAX;
4831 else
4832 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4833 }
4834 else
4835 fRegsToFree &= ~RT_BIT_32(idxReg);
4836 continue;
4837 }
4838
4839 case kIemNativeWhat_Arg:
4840 AssertMsgFailed(("What?!?: %u\n", idxReg));
4841 continue;
4842
4843 case kIemNativeWhat_rc:
4844 case kIemNativeWhat_Tmp:
4845 AssertMsgFailed(("Missing free: %u\n", idxReg));
4846 continue;
4847
4848 case kIemNativeWhat_FixedTmp:
4849 case kIemNativeWhat_pVCpuFixed:
4850 case kIemNativeWhat_pCtxFixed:
4851 case kIemNativeWhat_PcShadow:
4852 case kIemNativeWhat_FixedReserved:
4853 case kIemNativeWhat_Invalid:
4854 case kIemNativeWhat_End:
4855 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4856 }
4857 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4858 }
4859 }
4860
4861 /*
4862 * Do the actual freeing.
4863 */
4864 if (pReNative->Core.bmHstRegs & fRegsToFree)
4865 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4866 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4867 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4868
4869 /* If there are guest register shadows in any call-volatile register, we
4870 have to clear the corrsponding guest register masks for each register. */
4871 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4872 if (fHstRegsWithGstShadow)
4873 {
4874 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4875 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4876 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4877 do
4878 {
4879 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4880 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4881
4882 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4883 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4884 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4885 } while (fHstRegsWithGstShadow != 0);
4886 }
4887
4888 return off;
4889}
4890
4891
4892/**
4893 * Flushes a set of guest register shadow copies.
4894 *
4895 * This is usually done after calling a threaded function or a C-implementation
4896 * of an instruction.
4897 *
4898 * @param pReNative The native recompile state.
4899 * @param fGstRegs Set of guest registers to flush.
4900 */
4901DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4902{
4903 /*
4904 * Reduce the mask by what's currently shadowed
4905 */
4906 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4907 fGstRegs &= bmGstRegShadowsOld;
4908 if (fGstRegs)
4909 {
4910 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4911 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4912 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4913 if (bmGstRegShadowsNew)
4914 {
4915 /*
4916 * Partial.
4917 */
4918 do
4919 {
4920 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4921 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4922 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4923 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4924 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4925
4926 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4927 fGstRegs &= ~fInThisHstReg;
4928 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4929 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4930 if (!fGstRegShadowsNew)
4931 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4932 } while (fGstRegs != 0);
4933 }
4934 else
4935 {
4936 /*
4937 * Clear all.
4938 */
4939 do
4940 {
4941 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4942 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4943 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4944 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4945 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4946
4947 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4948 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4949 } while (fGstRegs != 0);
4950 pReNative->Core.bmHstRegsWithGstShadow = 0;
4951 }
4952 }
4953}
4954
4955
4956/**
4957 * Flushes guest register shadow copies held by a set of host registers.
4958 *
4959 * This is used with the TLB lookup code for ensuring that we don't carry on
4960 * with any guest shadows in volatile registers, as these will get corrupted by
4961 * a TLB miss.
4962 *
4963 * @param pReNative The native recompile state.
4964 * @param fHstRegs Set of host registers to flush guest shadows for.
4965 */
4966DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4967{
4968 /*
4969 * Reduce the mask by what's currently shadowed.
4970 */
4971 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4972 fHstRegs &= bmHstRegsWithGstShadowOld;
4973 if (fHstRegs)
4974 {
4975 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4976 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4977 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4978 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4979 if (bmHstRegsWithGstShadowNew)
4980 {
4981 /*
4982 * Partial (likely).
4983 */
4984 uint64_t fGstShadows = 0;
4985 do
4986 {
4987 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4988 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4989 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4990 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4991
4992 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4993 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4994 fHstRegs &= ~RT_BIT_32(idxHstReg);
4995 } while (fHstRegs != 0);
4996 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4997 }
4998 else
4999 {
5000 /*
5001 * Clear all.
5002 */
5003 do
5004 {
5005 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5006 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5007 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5008 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5009
5010 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5011 fHstRegs &= ~RT_BIT_32(idxHstReg);
5012 } while (fHstRegs != 0);
5013 pReNative->Core.bmGstRegShadows = 0;
5014 }
5015 }
5016}
5017
5018
5019/**
5020 * Restores guest shadow copies in volatile registers.
5021 *
5022 * This is used after calling a helper function (think TLB miss) to restore the
5023 * register state of volatile registers.
5024 *
5025 * @param pReNative The native recompile state.
5026 * @param off The code buffer offset.
5027 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5028 * be active (allocated) w/o asserting. Hack.
5029 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5030 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5031 */
5032DECL_HIDDEN_THROW(uint32_t)
5033iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5034{
5035 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5036 if (fHstRegs)
5037 {
5038 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5039 do
5040 {
5041 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5042
5043 /* It's not fatal if a register is active holding a variable that
5044 shadowing a guest register, ASSUMING all pending guest register
5045 writes were flushed prior to the helper call. However, we'll be
5046 emitting duplicate restores, so it wasts code space. */
5047 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5048 RT_NOREF(fHstRegsActiveShadows);
5049
5050 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5051 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5052 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5053 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5054
5055 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5056 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5057
5058 fHstRegs &= ~RT_BIT_32(idxHstReg);
5059 } while (fHstRegs != 0);
5060 }
5061 return off;
5062}
5063
5064
5065#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5066# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5067static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5068{
5069 /* Compare the shadow with the context value, they should match. */
5070 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5071 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5072 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5073 return off;
5074}
5075# endif
5076
5077/**
5078 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5079 */
5080static uint32_t
5081iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5082{
5083 if (pReNative->Core.offPc)
5084 {
5085# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5086 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5087 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5088# endif
5089
5090# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5091 /* Allocate a temporary PC register. */
5092 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5093
5094 /* Perform the addition and store the result. */
5095 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5096 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5097
5098 /* Free but don't flush the PC register. */
5099 iemNativeRegFreeTmp(pReNative, idxPcReg);
5100# else
5101 /* Compare the shadow with the context value, they should match. */
5102 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5103 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5104# endif
5105
5106 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5107 pReNative->Core.offPc = 0;
5108 pReNative->Core.cInstrPcUpdateSkipped = 0;
5109 }
5110# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5111 else
5112 {
5113 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5114 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5115 }
5116# endif
5117
5118 return off;
5119}
5120#endif
5121
5122
5123/**
5124 * Flushes delayed write of a specific guest register.
5125 *
5126 * This must be called prior to calling CImpl functions and any helpers that use
5127 * the guest state (like raising exceptions) and such.
5128 *
5129 * This optimization has not yet been implemented. The first target would be
5130 * RIP updates, since these are the most common ones.
5131 */
5132DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5133 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
5134{
5135#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5136 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
5137#endif
5138 RT_NOREF(pReNative, enmClass, idxReg);
5139 return off;
5140}
5141
5142
5143/**
5144 * Flushes any delayed guest register writes.
5145 *
5146 * This must be called prior to calling CImpl functions and any helpers that use
5147 * the guest state (like raising exceptions) and such.
5148 *
5149 * This optimization has not yet been implemented. The first target would be
5150 * RIP updates, since these are the most common ones.
5151 */
5152DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/)
5153{
5154#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5155 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5156 off = iemNativeEmitPcWriteback(pReNative, off);
5157#else
5158 RT_NOREF(pReNative, fGstShwExcept);
5159#endif
5160
5161 return off;
5162}
5163
5164
5165#ifdef VBOX_STRICT
5166/**
5167 * Does internal register allocator sanity checks.
5168 */
5169static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5170{
5171 /*
5172 * Iterate host registers building a guest shadowing set.
5173 */
5174 uint64_t bmGstRegShadows = 0;
5175 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5176 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5177 while (bmHstRegsWithGstShadow)
5178 {
5179 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5180 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5181 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5182
5183 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5184 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5185 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5186 bmGstRegShadows |= fThisGstRegShadows;
5187 while (fThisGstRegShadows)
5188 {
5189 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5190 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5191 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5192 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5193 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5194 }
5195 }
5196 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5197 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5198 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5199
5200 /*
5201 * Now the other way around, checking the guest to host index array.
5202 */
5203 bmHstRegsWithGstShadow = 0;
5204 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5205 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5206 while (bmGstRegShadows)
5207 {
5208 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5209 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5210 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5211
5212 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5213 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5214 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5215 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5216 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5217 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5218 }
5219 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5220 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5221 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5222}
5223#endif
5224
5225
5226/*********************************************************************************************************************************
5227* Code Emitters (larger snippets) *
5228*********************************************************************************************************************************/
5229
5230/**
5231 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5232 * extending to 64-bit width.
5233 *
5234 * @returns New code buffer offset on success, UINT32_MAX on failure.
5235 * @param pReNative .
5236 * @param off The current code buffer position.
5237 * @param idxHstReg The host register to load the guest register value into.
5238 * @param enmGstReg The guest register to load.
5239 *
5240 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5241 * that is something the caller needs to do if applicable.
5242 */
5243DECL_HIDDEN_THROW(uint32_t)
5244iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5245{
5246 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
5247 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5248
5249 switch (g_aGstShadowInfo[enmGstReg].cb)
5250 {
5251 case sizeof(uint64_t):
5252 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5253 case sizeof(uint32_t):
5254 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5255 case sizeof(uint16_t):
5256 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5257#if 0 /* not present in the table. */
5258 case sizeof(uint8_t):
5259 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5260#endif
5261 default:
5262 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5263 }
5264}
5265
5266
5267#ifdef VBOX_STRICT
5268/**
5269 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5270 *
5271 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5272 * Trashes EFLAGS on AMD64.
5273 */
5274static uint32_t
5275iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5276{
5277# ifdef RT_ARCH_AMD64
5278 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5279
5280 /* rol reg64, 32 */
5281 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5282 pbCodeBuf[off++] = 0xc1;
5283 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5284 pbCodeBuf[off++] = 32;
5285
5286 /* test reg32, ffffffffh */
5287 if (idxReg >= 8)
5288 pbCodeBuf[off++] = X86_OP_REX_B;
5289 pbCodeBuf[off++] = 0xf7;
5290 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5291 pbCodeBuf[off++] = 0xff;
5292 pbCodeBuf[off++] = 0xff;
5293 pbCodeBuf[off++] = 0xff;
5294 pbCodeBuf[off++] = 0xff;
5295
5296 /* je/jz +1 */
5297 pbCodeBuf[off++] = 0x74;
5298 pbCodeBuf[off++] = 0x01;
5299
5300 /* int3 */
5301 pbCodeBuf[off++] = 0xcc;
5302
5303 /* rol reg64, 32 */
5304 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5305 pbCodeBuf[off++] = 0xc1;
5306 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5307 pbCodeBuf[off++] = 32;
5308
5309# elif defined(RT_ARCH_ARM64)
5310 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5311 /* lsr tmp0, reg64, #32 */
5312 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5313 /* cbz tmp0, +1 */
5314 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5315 /* brk #0x1100 */
5316 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5317
5318# else
5319# error "Port me!"
5320# endif
5321 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5322 return off;
5323}
5324#endif /* VBOX_STRICT */
5325
5326
5327#ifdef VBOX_STRICT
5328/**
5329 * Emitting code that checks that the content of register @a idxReg is the same
5330 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5331 * instruction if that's not the case.
5332 *
5333 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5334 * Trashes EFLAGS on AMD64.
5335 */
5336static uint32_t
5337iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5338{
5339# ifdef RT_ARCH_AMD64
5340 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5341
5342 /* cmp reg, [mem] */
5343 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5344 {
5345 if (idxReg >= 8)
5346 pbCodeBuf[off++] = X86_OP_REX_R;
5347 pbCodeBuf[off++] = 0x38;
5348 }
5349 else
5350 {
5351 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5352 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5353 else
5354 {
5355 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5356 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5357 else
5358 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5360 if (idxReg >= 8)
5361 pbCodeBuf[off++] = X86_OP_REX_R;
5362 }
5363 pbCodeBuf[off++] = 0x39;
5364 }
5365 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5366
5367 /* je/jz +1 */
5368 pbCodeBuf[off++] = 0x74;
5369 pbCodeBuf[off++] = 0x01;
5370
5371 /* int3 */
5372 pbCodeBuf[off++] = 0xcc;
5373
5374 /* For values smaller than the register size, we must check that the rest
5375 of the register is all zeros. */
5376 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5377 {
5378 /* test reg64, imm32 */
5379 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5380 pbCodeBuf[off++] = 0xf7;
5381 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5382 pbCodeBuf[off++] = 0;
5383 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5384 pbCodeBuf[off++] = 0xff;
5385 pbCodeBuf[off++] = 0xff;
5386
5387 /* je/jz +1 */
5388 pbCodeBuf[off++] = 0x74;
5389 pbCodeBuf[off++] = 0x01;
5390
5391 /* int3 */
5392 pbCodeBuf[off++] = 0xcc;
5393 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5394 }
5395 else
5396 {
5397 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5398 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5399 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5400 }
5401
5402# elif defined(RT_ARCH_ARM64)
5403 /* mov TMP0, [gstreg] */
5404 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5405
5406 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5407 /* sub tmp0, tmp0, idxReg */
5408 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5409 /* cbz tmp0, +1 */
5410 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5411 /* brk #0x1000+enmGstReg */
5412 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5413 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5414
5415# else
5416# error "Port me!"
5417# endif
5418 return off;
5419}
5420#endif /* VBOX_STRICT */
5421
5422
5423#ifdef VBOX_STRICT
5424/**
5425 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5426 * important bits.
5427 *
5428 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5429 * Trashes EFLAGS on AMD64.
5430 */
5431static uint32_t
5432iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5433{
5434 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5435 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5436 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5437 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5438
5439#ifdef RT_ARCH_AMD64
5440 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5441
5442 /* je/jz +1 */
5443 pbCodeBuf[off++] = 0x74;
5444 pbCodeBuf[off++] = 0x01;
5445
5446 /* int3 */
5447 pbCodeBuf[off++] = 0xcc;
5448
5449# elif defined(RT_ARCH_ARM64)
5450 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5451
5452 /* b.eq +1 */
5453 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5454 /* brk #0x2000 */
5455 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5456
5457# else
5458# error "Port me!"
5459# endif
5460 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5461
5462 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5463 return off;
5464}
5465#endif /* VBOX_STRICT */
5466
5467
5468/**
5469 * Emits a code for checking the return code of a call and rcPassUp, returning
5470 * from the code if either are non-zero.
5471 */
5472DECL_HIDDEN_THROW(uint32_t)
5473iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5474{
5475#ifdef RT_ARCH_AMD64
5476 /*
5477 * AMD64: eax = call status code.
5478 */
5479
5480 /* edx = rcPassUp */
5481 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5482# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5483 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5484# endif
5485
5486 /* edx = eax | rcPassUp */
5487 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5488 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5489 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5490 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5491
5492 /* Jump to non-zero status return path. */
5493 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5494
5495 /* done. */
5496
5497#elif RT_ARCH_ARM64
5498 /*
5499 * ARM64: w0 = call status code.
5500 */
5501# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5502 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5503# endif
5504 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5505
5506 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5507
5508 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5509
5510 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5511 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5512 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5513
5514#else
5515# error "port me"
5516#endif
5517 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5518 RT_NOREF_PV(idxInstr);
5519 return off;
5520}
5521
5522
5523/**
5524 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5525 * raising a \#GP(0) if it isn't.
5526 *
5527 * @returns New code buffer offset, UINT32_MAX on failure.
5528 * @param pReNative The native recompile state.
5529 * @param off The code buffer offset.
5530 * @param idxAddrReg The host register with the address to check.
5531 * @param idxInstr The current instruction.
5532 */
5533DECL_HIDDEN_THROW(uint32_t)
5534iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5535{
5536 /*
5537 * Make sure we don't have any outstanding guest register writes as we may
5538 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5539 */
5540 off = iemNativeRegFlushPendingWrites(pReNative, off);
5541
5542#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5543 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5544#else
5545 RT_NOREF(idxInstr);
5546#endif
5547
5548#ifdef RT_ARCH_AMD64
5549 /*
5550 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5551 * return raisexcpt();
5552 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5553 */
5554 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5555
5556 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5557 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5558 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5559 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5560 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5561
5562 iemNativeRegFreeTmp(pReNative, iTmpReg);
5563
5564#elif defined(RT_ARCH_ARM64)
5565 /*
5566 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5567 * return raisexcpt();
5568 * ----
5569 * mov x1, 0x800000000000
5570 * add x1, x0, x1
5571 * cmp xzr, x1, lsr 48
5572 * b.ne .Lraisexcpt
5573 */
5574 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5575
5576 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5577 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5578 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5579 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5580
5581 iemNativeRegFreeTmp(pReNative, iTmpReg);
5582
5583#else
5584# error "Port me"
5585#endif
5586 return off;
5587}
5588
5589
5590/**
5591 * Emits code to check if that the content of @a idxAddrReg is within the limit
5592 * of CS, raising a \#GP(0) if it isn't.
5593 *
5594 * @returns New code buffer offset; throws VBox status code on error.
5595 * @param pReNative The native recompile state.
5596 * @param off The code buffer offset.
5597 * @param idxAddrReg The host register (32-bit) with the address to
5598 * check.
5599 * @param idxInstr The current instruction.
5600 */
5601DECL_HIDDEN_THROW(uint32_t)
5602iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5603 uint8_t idxAddrReg, uint8_t idxInstr)
5604{
5605 /*
5606 * Make sure we don't have any outstanding guest register writes as we may
5607 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5608 */
5609 off = iemNativeRegFlushPendingWrites(pReNative, off);
5610
5611#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5612 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5613#else
5614 RT_NOREF(idxInstr);
5615#endif
5616
5617 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5618 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
5619 kIemNativeGstRegUse_ReadOnly);
5620
5621 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
5622 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5623
5624 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
5625 return off;
5626}
5627
5628
5629/**
5630 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5631 *
5632 * @returns The flush mask.
5633 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5634 * @param fGstShwFlush The starting flush mask.
5635 */
5636DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5637{
5638 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5639 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5640 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5641 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5642 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5643 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5644 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5645 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5646 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5647 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5648 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5649 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5650 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5651 return fGstShwFlush;
5652}
5653
5654
5655/**
5656 * Emits a call to a CImpl function or something similar.
5657 */
5658DECL_HIDDEN_THROW(uint32_t)
5659iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5660 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5661{
5662 /* Writeback everything. */
5663 off = iemNativeRegFlushPendingWrites(pReNative, off);
5664
5665 /*
5666 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5667 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5668 */
5669 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5670 fGstShwFlush
5671 | RT_BIT_64(kIemNativeGstReg_Pc)
5672 | RT_BIT_64(kIemNativeGstReg_EFlags));
5673 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5674
5675 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5676
5677 /*
5678 * Load the parameters.
5679 */
5680#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5681 /* Special code the hidden VBOXSTRICTRC pointer. */
5682 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5683 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5684 if (cAddParams > 0)
5685 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5686 if (cAddParams > 1)
5687 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5688 if (cAddParams > 2)
5689 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5690 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5691
5692#else
5693 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5694 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5695 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5696 if (cAddParams > 0)
5697 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5698 if (cAddParams > 1)
5699 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5700 if (cAddParams > 2)
5701# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5702 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5703# else
5704 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5705# endif
5706#endif
5707
5708 /*
5709 * Make the call.
5710 */
5711 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5712
5713#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5714 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5715#endif
5716
5717 /*
5718 * Check the status code.
5719 */
5720 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5721}
5722
5723
5724/**
5725 * Emits a call to a threaded worker function.
5726 */
5727DECL_HIDDEN_THROW(uint32_t)
5728iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5729{
5730 /* We don't know what the threaded function is doing so we must flush all pending writes. */
5731 off = iemNativeRegFlushPendingWrites(pReNative, off);
5732
5733 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5734 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5735
5736#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5737 /* The threaded function may throw / long jmp, so set current instruction
5738 number if we're counting. */
5739 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5740#endif
5741
5742 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5743
5744#ifdef RT_ARCH_AMD64
5745 /* Load the parameters and emit the call. */
5746# ifdef RT_OS_WINDOWS
5747# ifndef VBOXSTRICTRC_STRICT_ENABLED
5748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5749 if (cParams > 0)
5750 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5751 if (cParams > 1)
5752 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5753 if (cParams > 2)
5754 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5755# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5757 if (cParams > 0)
5758 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5759 if (cParams > 1)
5760 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5761 if (cParams > 2)
5762 {
5763 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5764 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5765 }
5766 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5767# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5768# else
5769 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5770 if (cParams > 0)
5771 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5772 if (cParams > 1)
5773 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5774 if (cParams > 2)
5775 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5776# endif
5777
5778 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5779
5780# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5781 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5782# endif
5783
5784#elif RT_ARCH_ARM64
5785 /*
5786 * ARM64:
5787 */
5788 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5789 if (cParams > 0)
5790 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5791 if (cParams > 1)
5792 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5793 if (cParams > 2)
5794 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5795
5796 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5797
5798#else
5799# error "port me"
5800#endif
5801
5802 /*
5803 * Check the status code.
5804 */
5805 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5806
5807 return off;
5808}
5809
5810#ifdef VBOX_WITH_STATISTICS
5811/**
5812 * Emits code to update the thread call statistics.
5813 */
5814DECL_INLINE_THROW(uint32_t)
5815iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5816{
5817 /*
5818 * Update threaded function stats.
5819 */
5820 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
5821 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
5822# if defined(RT_ARCH_ARM64)
5823 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
5824 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
5825 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
5826 iemNativeRegFreeTmp(pReNative, idxTmp1);
5827 iemNativeRegFreeTmp(pReNative, idxTmp2);
5828# else
5829 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
5830# endif
5831 return off;
5832}
5833#endif /* VBOX_WITH_STATISTICS */
5834
5835
5836/**
5837 * Emits the code at the CheckBranchMiss label.
5838 */
5839static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5840{
5841 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5842 if (idxLabel != UINT32_MAX)
5843 {
5844 iemNativeLabelDefine(pReNative, idxLabel, off);
5845
5846 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5847 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5848 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5849
5850 /* jump back to the return sequence. */
5851 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5852 }
5853 return off;
5854}
5855
5856
5857/**
5858 * Emits the code at the NeedCsLimChecking label.
5859 */
5860static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5861{
5862 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5863 if (idxLabel != UINT32_MAX)
5864 {
5865 iemNativeLabelDefine(pReNative, idxLabel, off);
5866
5867 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5868 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5869 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5870
5871 /* jump back to the return sequence. */
5872 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5873 }
5874 return off;
5875}
5876
5877
5878/**
5879 * Emits the code at the ObsoleteTb label.
5880 */
5881static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5882{
5883 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5884 if (idxLabel != UINT32_MAX)
5885 {
5886 iemNativeLabelDefine(pReNative, idxLabel, off);
5887
5888 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5889 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5890 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5891
5892 /* jump back to the return sequence. */
5893 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5894 }
5895 return off;
5896}
5897
5898
5899/**
5900 * Emits the code at the RaiseGP0 label.
5901 */
5902static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5903{
5904 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5905 if (idxLabel != UINT32_MAX)
5906 {
5907 iemNativeLabelDefine(pReNative, idxLabel, off);
5908
5909 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5910 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5911 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5912
5913 /* jump back to the return sequence. */
5914 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5915 }
5916 return off;
5917}
5918
5919
5920/**
5921 * Emits the code at the RaiseNm label.
5922 */
5923static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5924{
5925 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
5926 if (idxLabel != UINT32_MAX)
5927 {
5928 iemNativeLabelDefine(pReNative, idxLabel, off);
5929
5930 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
5931 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5932 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
5933
5934 /* jump back to the return sequence. */
5935 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5936 }
5937 return off;
5938}
5939
5940
5941/**
5942 * Emits the code at the RaiseUd label.
5943 */
5944static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5945{
5946 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
5947 if (idxLabel != UINT32_MAX)
5948 {
5949 iemNativeLabelDefine(pReNative, idxLabel, off);
5950
5951 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
5952 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5953 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
5954
5955 /* jump back to the return sequence. */
5956 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5957 }
5958 return off;
5959}
5960
5961
5962/**
5963 * Emits the code at the RaiseMf label.
5964 */
5965static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5966{
5967 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
5968 if (idxLabel != UINT32_MAX)
5969 {
5970 iemNativeLabelDefine(pReNative, idxLabel, off);
5971
5972 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
5973 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5974 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
5975
5976 /* jump back to the return sequence. */
5977 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5978 }
5979 return off;
5980}
5981
5982
5983/**
5984 * Emits the code at the ReturnWithFlags label (returns
5985 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5986 */
5987static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5988{
5989 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5990 if (idxLabel != UINT32_MAX)
5991 {
5992 iemNativeLabelDefine(pReNative, idxLabel, off);
5993
5994 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5995
5996 /* jump back to the return sequence. */
5997 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5998 }
5999 return off;
6000}
6001
6002
6003/**
6004 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6005 */
6006static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6007{
6008 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6009 if (idxLabel != UINT32_MAX)
6010 {
6011 iemNativeLabelDefine(pReNative, idxLabel, off);
6012
6013 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6014
6015 /* jump back to the return sequence. */
6016 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6017 }
6018 return off;
6019}
6020
6021
6022/**
6023 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6024 */
6025static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6026{
6027 /*
6028 * Generate the rc + rcPassUp fiddling code if needed.
6029 */
6030 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6031 if (idxLabel != UINT32_MAX)
6032 {
6033 iemNativeLabelDefine(pReNative, idxLabel, off);
6034
6035 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6036#ifdef RT_ARCH_AMD64
6037# ifdef RT_OS_WINDOWS
6038# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6039 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6040# endif
6041 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6042 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6043# else
6044 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6045 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6046# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6047 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6048# endif
6049# endif
6050# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6051 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6052# endif
6053
6054#else
6055 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6056 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6057 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6058#endif
6059
6060 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6061 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6062 }
6063 return off;
6064}
6065
6066
6067/**
6068 * Emits a standard epilog.
6069 */
6070static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6071{
6072 *pidxReturnLabel = UINT32_MAX;
6073
6074 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6075 off = iemNativeRegFlushPendingWrites(pReNative, off);
6076
6077 /*
6078 * Successful return, so clear the return register (eax, w0).
6079 */
6080 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6081
6082 /*
6083 * Define label for common return point.
6084 */
6085 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6086 *pidxReturnLabel = idxReturn;
6087
6088 /*
6089 * Restore registers and return.
6090 */
6091#ifdef RT_ARCH_AMD64
6092 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6093
6094 /* Reposition esp at the r15 restore point. */
6095 pbCodeBuf[off++] = X86_OP_REX_W;
6096 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6097 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6098 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6099
6100 /* Pop non-volatile registers and return */
6101 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6102 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6103 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6104 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6105 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6106 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6107 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6108 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6109# ifdef RT_OS_WINDOWS
6110 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6111 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6112# endif
6113 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6114 pbCodeBuf[off++] = 0xc9; /* leave */
6115 pbCodeBuf[off++] = 0xc3; /* ret */
6116 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6117
6118#elif RT_ARCH_ARM64
6119 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6120
6121 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6122 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6123 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6124 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6125 IEMNATIVE_FRAME_VAR_SIZE / 8);
6126 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6127 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6128 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6129 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6130 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6131 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6132 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6133 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6134 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6135 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6136 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6137 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6138
6139 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6140 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6141 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6142 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6143
6144 /* retab / ret */
6145# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6146 if (1)
6147 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6148 else
6149# endif
6150 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6151
6152#else
6153# error "port me"
6154#endif
6155 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6156
6157 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6158}
6159
6160
6161/**
6162 * Emits a standard prolog.
6163 */
6164static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6165{
6166#ifdef RT_ARCH_AMD64
6167 /*
6168 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6169 * reserving 64 bytes for stack variables plus 4 non-register argument
6170 * slots. Fixed register assignment: xBX = pReNative;
6171 *
6172 * Since we always do the same register spilling, we can use the same
6173 * unwind description for all the code.
6174 */
6175 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6176 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6177 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6178 pbCodeBuf[off++] = 0x8b;
6179 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6180 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6181 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6182# ifdef RT_OS_WINDOWS
6183 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6184 pbCodeBuf[off++] = 0x8b;
6185 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6186 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6187 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6188# else
6189 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6190 pbCodeBuf[off++] = 0x8b;
6191 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6192# endif
6193 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6194 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6195 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6196 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6197 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6198 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6199 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6200 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6201
6202# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6203 /* Save the frame pointer. */
6204 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6205# endif
6206
6207 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6208 X86_GREG_xSP,
6209 IEMNATIVE_FRAME_ALIGN_SIZE
6210 + IEMNATIVE_FRAME_VAR_SIZE
6211 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6212 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6213 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6214 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6215 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6216
6217#elif RT_ARCH_ARM64
6218 /*
6219 * We set up a stack frame exactly like on x86, only we have to push the
6220 * return address our selves here. We save all non-volatile registers.
6221 */
6222 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6223
6224# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6225 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6226 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6227 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6228 /* pacibsp */
6229 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6230# endif
6231
6232 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6233 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6234 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6235 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6236 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6237 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6238 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6239 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6240 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6241 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6242 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6243 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6244 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6245 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6246 /* Save the BP and LR (ret address) registers at the top of the frame. */
6247 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6248 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6249 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6250 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6251 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6252 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6253
6254 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6255 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6256
6257 /* mov r28, r0 */
6258 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6259 /* mov r27, r1 */
6260 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6261
6262# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6263 /* Save the frame pointer. */
6264 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6265 ARMV8_A64_REG_X2);
6266# endif
6267
6268#else
6269# error "port me"
6270#endif
6271 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6272 return off;
6273}
6274
6275
6276
6277
6278/*********************************************************************************************************************************
6279* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
6280*********************************************************************************************************************************/
6281
6282#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
6283 { \
6284 Assert(pReNative->Core.bmVars == 0); \
6285 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
6286 Assert(pReNative->Core.bmStack == 0); \
6287 pReNative->fMc = (a_fMcFlags); \
6288 pReNative->fCImpl = (a_fCImplFlags); \
6289 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
6290
6291/** We have to get to the end in recompilation mode, as otherwise we won't
6292 * generate code for all the IEM_MC_IF_XXX branches. */
6293#define IEM_MC_END() \
6294 iemNativeVarFreeAll(pReNative); \
6295 } return off
6296
6297
6298
6299/*********************************************************************************************************************************
6300* Native Emitter Support. *
6301*********************************************************************************************************************************/
6302
6303
6304#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
6305
6306#define IEM_MC_NATIVE_ELSE() } else {
6307
6308#define IEM_MC_NATIVE_ENDIF() } ((void)0)
6309
6310
6311#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
6312 off = a_fnEmitter(pReNative, off)
6313
6314#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
6315 off = a_fnEmitter(pReNative, off, (a0))
6316
6317#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
6318 off = a_fnEmitter(pReNative, off, (a0), (a1))
6319
6320#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
6321 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
6322
6323#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
6324 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
6325
6326#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
6327 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
6328
6329#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
6330 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
6331
6332#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
6333 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
6334
6335#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
6336 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
6337
6338
6339
6340/*********************************************************************************************************************************
6341* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
6342*********************************************************************************************************************************/
6343
6344#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
6345 pReNative->fMc = 0; \
6346 pReNative->fCImpl = (a_fFlags); \
6347 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
6348
6349
6350#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6351 pReNative->fMc = 0; \
6352 pReNative->fCImpl = (a_fFlags); \
6353 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
6354
6355DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6356 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6357 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
6358{
6359 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
6360}
6361
6362
6363#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6364 pReNative->fMc = 0; \
6365 pReNative->fCImpl = (a_fFlags); \
6366 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6367 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
6368
6369DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6370 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6371 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
6372{
6373 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
6374}
6375
6376
6377#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6378 pReNative->fMc = 0; \
6379 pReNative->fCImpl = (a_fFlags); \
6380 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6381 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
6382
6383DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6384 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6385 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
6386 uint64_t uArg2)
6387{
6388 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
6389}
6390
6391
6392
6393/*********************************************************************************************************************************
6394* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
6395*********************************************************************************************************************************/
6396
6397/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
6398 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
6399DECL_INLINE_THROW(uint32_t)
6400iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6401{
6402 /*
6403 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
6404 * return with special status code and make the execution loop deal with
6405 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
6406 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
6407 * could continue w/o interruption, it probably will drop into the
6408 * debugger, so not worth the effort of trying to services it here and we
6409 * just lump it in with the handling of the others.
6410 *
6411 * To simplify the code and the register state management even more (wrt
6412 * immediate in AND operation), we always update the flags and skip the
6413 * extra check associated conditional jump.
6414 */
6415 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
6416 <= UINT32_MAX);
6417#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6418 AssertMsg( pReNative->idxCurCall == 0
6419 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
6420 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
6421#endif
6422
6423 /*
6424 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
6425 * any pending register writes must be flushed.
6426 */
6427 off = iemNativeRegFlushPendingWrites(pReNative, off);
6428
6429 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6430 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6431 true /*fSkipLivenessAssert*/);
6432 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
6433 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
6434 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
6435 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
6436 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6437
6438 /* Free but don't flush the EFLAGS register. */
6439 iemNativeRegFreeTmp(pReNative, idxEflReg);
6440
6441 return off;
6442}
6443
6444
6445/** The VINF_SUCCESS dummy. */
6446template<int const a_rcNormal>
6447DECL_FORCE_INLINE(uint32_t)
6448iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6449{
6450 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
6451 if (a_rcNormal != VINF_SUCCESS)
6452 {
6453#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6454 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6455#else
6456 RT_NOREF_PV(idxInstr);
6457#endif
6458
6459 /* As this code returns from the TB any pending register writes must be flushed. */
6460 off = iemNativeRegFlushPendingWrites(pReNative, off);
6461
6462 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
6463 }
6464 return off;
6465}
6466
6467
6468#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
6469 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6470 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6471
6472#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6473 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6474 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6475 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6476
6477/** Same as iemRegAddToRip64AndFinishingNoFlags. */
6478DECL_INLINE_THROW(uint32_t)
6479iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6480{
6481#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6482# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6483 if (!pReNative->Core.offPc)
6484 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6485# endif
6486
6487 /* Allocate a temporary PC register. */
6488 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6489
6490 /* Perform the addition and store the result. */
6491 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
6492 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6493
6494 /* Free but don't flush the PC register. */
6495 iemNativeRegFreeTmp(pReNative, idxPcReg);
6496#endif
6497
6498#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6499 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6500
6501 pReNative->Core.offPc += cbInstr;
6502# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6503 off = iemNativePcAdjustCheck(pReNative, off);
6504# endif
6505 if (pReNative->cCondDepth)
6506 off = iemNativeEmitPcWriteback(pReNative, off);
6507 else
6508 pReNative->Core.cInstrPcUpdateSkipped++;
6509#endif
6510
6511 return off;
6512}
6513
6514
6515#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6516 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6517 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6518
6519#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6520 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6521 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6522 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6523
6524/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6525DECL_INLINE_THROW(uint32_t)
6526iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6527{
6528#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6529# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6530 if (!pReNative->Core.offPc)
6531 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6532# endif
6533
6534 /* Allocate a temporary PC register. */
6535 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6536
6537 /* Perform the addition and store the result. */
6538 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6539 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6540
6541 /* Free but don't flush the PC register. */
6542 iemNativeRegFreeTmp(pReNative, idxPcReg);
6543#endif
6544
6545#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6546 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6547
6548 pReNative->Core.offPc += cbInstr;
6549# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6550 off = iemNativePcAdjustCheck(pReNative, off);
6551# endif
6552 if (pReNative->cCondDepth)
6553 off = iemNativeEmitPcWriteback(pReNative, off);
6554 else
6555 pReNative->Core.cInstrPcUpdateSkipped++;
6556#endif
6557
6558 return off;
6559}
6560
6561
6562#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6563 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6564 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6565
6566#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6567 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6568 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6569 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6570
6571/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6572DECL_INLINE_THROW(uint32_t)
6573iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6574{
6575#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6576# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6577 if (!pReNative->Core.offPc)
6578 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6579# endif
6580
6581 /* Allocate a temporary PC register. */
6582 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6583
6584 /* Perform the addition and store the result. */
6585 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6586 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6587 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6588
6589 /* Free but don't flush the PC register. */
6590 iemNativeRegFreeTmp(pReNative, idxPcReg);
6591#endif
6592
6593#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6594 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6595
6596 pReNative->Core.offPc += cbInstr;
6597# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6598 off = iemNativePcAdjustCheck(pReNative, off);
6599# endif
6600 if (pReNative->cCondDepth)
6601 off = iemNativeEmitPcWriteback(pReNative, off);
6602 else
6603 pReNative->Core.cInstrPcUpdateSkipped++;
6604#endif
6605
6606 return off;
6607}
6608
6609
6610
6611/*********************************************************************************************************************************
6612* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6613*********************************************************************************************************************************/
6614
6615#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6616 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6617 (a_enmEffOpSize), pCallEntry->idxInstr); \
6618 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6619
6620#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6621 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6622 (a_enmEffOpSize), pCallEntry->idxInstr); \
6623 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6624 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6625
6626#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6627 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6628 IEMMODE_16BIT, pCallEntry->idxInstr); \
6629 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6630
6631#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6632 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6633 IEMMODE_16BIT, pCallEntry->idxInstr); \
6634 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6635 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6636
6637#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6638 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6639 IEMMODE_64BIT, pCallEntry->idxInstr); \
6640 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6641
6642#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6643 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6644 IEMMODE_64BIT, pCallEntry->idxInstr); \
6645 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6646 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6647
6648/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6649 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6650 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6651DECL_INLINE_THROW(uint32_t)
6652iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6653 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6654{
6655 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6656
6657 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6658 off = iemNativeRegFlushPendingWrites(pReNative, off);
6659
6660#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6661 Assert(pReNative->Core.offPc == 0);
6662
6663 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6664#endif
6665
6666 /* Allocate a temporary PC register. */
6667 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6668
6669 /* Perform the addition. */
6670 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6671
6672 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6673 {
6674 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6675 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6676 }
6677 else
6678 {
6679 /* Just truncate the result to 16-bit IP. */
6680 Assert(enmEffOpSize == IEMMODE_16BIT);
6681 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6682 }
6683 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6684
6685 /* Free but don't flush the PC register. */
6686 iemNativeRegFreeTmp(pReNative, idxPcReg);
6687
6688 return off;
6689}
6690
6691
6692#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6693 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6694 (a_enmEffOpSize), pCallEntry->idxInstr); \
6695 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6696
6697#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6698 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6699 (a_enmEffOpSize), pCallEntry->idxInstr); \
6700 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6701 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6702
6703#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6704 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6705 IEMMODE_16BIT, pCallEntry->idxInstr); \
6706 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6707
6708#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6709 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6710 IEMMODE_16BIT, pCallEntry->idxInstr); \
6711 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6712 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6713
6714#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6715 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6716 IEMMODE_32BIT, pCallEntry->idxInstr); \
6717 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6718
6719#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6720 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6721 IEMMODE_32BIT, pCallEntry->idxInstr); \
6722 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6723 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6724
6725/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6726 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6727 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6728DECL_INLINE_THROW(uint32_t)
6729iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6730 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6731{
6732 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6733
6734 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6735 off = iemNativeRegFlushPendingWrites(pReNative, off);
6736
6737#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6738 Assert(pReNative->Core.offPc == 0);
6739
6740 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6741#endif
6742
6743 /* Allocate a temporary PC register. */
6744 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6745
6746 /* Perform the addition. */
6747 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6748
6749 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6750 if (enmEffOpSize == IEMMODE_16BIT)
6751 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6752
6753 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6754/** @todo we can skip this in 32-bit FLAT mode. */
6755 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6756
6757 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6758
6759 /* Free but don't flush the PC register. */
6760 iemNativeRegFreeTmp(pReNative, idxPcReg);
6761
6762 return off;
6763}
6764
6765
6766#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6767 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6768 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6769
6770#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6771 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6772 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6773 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6774
6775#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6776 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6777 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6778
6779#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6780 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6781 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6782 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6783
6784#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6785 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6786 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6787
6788#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6789 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6790 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6791 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6792
6793/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6794DECL_INLINE_THROW(uint32_t)
6795iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6796 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6797{
6798 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6799 off = iemNativeRegFlushPendingWrites(pReNative, off);
6800
6801#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6802 Assert(pReNative->Core.offPc == 0);
6803
6804 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6805#endif
6806
6807 /* Allocate a temporary PC register. */
6808 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6809
6810 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6811 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6812 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6813 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6814 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6815
6816 /* Free but don't flush the PC register. */
6817 iemNativeRegFreeTmp(pReNative, idxPcReg);
6818
6819 return off;
6820}
6821
6822
6823
6824/*********************************************************************************************************************************
6825* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6826*********************************************************************************************************************************/
6827
6828/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6829#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6830 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6831
6832/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6833#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6834 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6835
6836/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6837#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6838 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6839
6840/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6841 * clears flags. */
6842#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6843 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6844 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6845
6846/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6847 * clears flags. */
6848#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6849 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6850 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6851
6852/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6853 * clears flags. */
6854#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6855 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6856 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6857
6858#undef IEM_MC_SET_RIP_U16_AND_FINISH
6859
6860
6861/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6862#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6863 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6864
6865/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6866#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6867 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6868
6869/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6870 * clears flags. */
6871#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6872 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6873 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6874
6875/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6876 * and clears flags. */
6877#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6878 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6879 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6880
6881#undef IEM_MC_SET_RIP_U32_AND_FINISH
6882
6883
6884/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6885#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6886 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6887
6888/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6889 * and clears flags. */
6890#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6891 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6892 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6893
6894#undef IEM_MC_SET_RIP_U64_AND_FINISH
6895
6896
6897/** Same as iemRegRipJumpU16AndFinishNoFlags,
6898 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6899DECL_INLINE_THROW(uint32_t)
6900iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6901 uint8_t idxInstr, uint8_t cbVar)
6902{
6903 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6904 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
6905
6906 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6907 off = iemNativeRegFlushPendingWrites(pReNative, off);
6908
6909#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6910 Assert(pReNative->Core.offPc == 0);
6911
6912 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6913#endif
6914
6915 /* Get a register with the new PC loaded from idxVarPc.
6916 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6917 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6918
6919 /* Check limit (may #GP(0) + exit TB). */
6920 if (!f64Bit)
6921/** @todo we can skip this test in FLAT 32-bit mode. */
6922 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6923 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6924 else if (cbVar > sizeof(uint32_t))
6925 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6926
6927 /* Store the result. */
6928 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6929
6930 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6931 /** @todo implictly free the variable? */
6932
6933 return off;
6934}
6935
6936
6937
6938/*********************************************************************************************************************************
6939* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
6940*********************************************************************************************************************************/
6941
6942#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
6943 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
6944
6945/**
6946 * Emits code to check if a \#NM exception should be raised.
6947 *
6948 * @returns New code buffer offset, UINT32_MAX on failure.
6949 * @param pReNative The native recompile state.
6950 * @param off The code buffer offset.
6951 * @param idxInstr The current instruction.
6952 */
6953DECL_INLINE_THROW(uint32_t)
6954iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6955{
6956 /*
6957 * Make sure we don't have any outstanding guest register writes as we may
6958 * raise an #NM and all guest register must be up to date in CPUMCTX.
6959 *
6960 * @todo r=aeichner Can we postpone this to the RaiseNm path?
6961 */
6962 off = iemNativeRegFlushPendingWrites(pReNative, off);
6963
6964#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6965 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6966#else
6967 RT_NOREF(idxInstr);
6968#endif
6969
6970 /* Allocate a temporary CR0 register. */
6971 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
6972 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
6973
6974 /*
6975 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
6976 * return raisexcpt();
6977 */
6978 /* Test and jump. */
6979 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
6980
6981 /* Free but don't flush the CR0 register. */
6982 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
6983
6984 return off;
6985}
6986
6987
6988#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
6989 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
6990
6991/**
6992 * Emits code to check if a \#MF exception should be raised.
6993 *
6994 * @returns New code buffer offset, UINT32_MAX on failure.
6995 * @param pReNative The native recompile state.
6996 * @param off The code buffer offset.
6997 * @param idxInstr The current instruction.
6998 */
6999DECL_INLINE_THROW(uint32_t)
7000iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7001{
7002 /*
7003 * Make sure we don't have any outstanding guest register writes as we may
7004 * raise an #MF and all guest register must be up to date in CPUMCTX.
7005 *
7006 * @todo r=aeichner Can we postpone this to the RaiseMf path?
7007 */
7008 off = iemNativeRegFlushPendingWrites(pReNative, off);
7009
7010#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7011 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7012#else
7013 RT_NOREF(idxInstr);
7014#endif
7015
7016 /* Allocate a temporary FSW register. */
7017 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
7018 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
7019
7020 /*
7021 * if (FSW & X86_FSW_ES != 0)
7022 * return raisexcpt();
7023 */
7024 /* Test and jump. */
7025 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
7026
7027 /* Free but don't flush the FSW register. */
7028 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
7029
7030 return off;
7031}
7032
7033
7034#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
7035 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
7036
7037/**
7038 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
7039 *
7040 * @returns New code buffer offset, UINT32_MAX on failure.
7041 * @param pReNative The native recompile state.
7042 * @param off The code buffer offset.
7043 * @param idxInstr The current instruction.
7044 */
7045DECL_INLINE_THROW(uint32_t)
7046iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7047{
7048 /*
7049 * Make sure we don't have any outstanding guest register writes as we may
7050 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
7051 *
7052 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
7053 */
7054 off = iemNativeRegFlushPendingWrites(pReNative, off);
7055
7056#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7057 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7058#else
7059 RT_NOREF(idxInstr);
7060#endif
7061
7062 /* Allocate a temporary CR0 and CR4 register. */
7063 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
7064 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
7065 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
7066 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
7067
7068 /** @todo r=aeichner Optimize this more later to have less compares and branches,
7069 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
7070 * actual performance benefit first). */
7071 /*
7072 * if (cr0 & X86_CR0_EM)
7073 * return raisexcpt();
7074 */
7075 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM, idxLabelRaiseUd);
7076 /*
7077 * if (!(cr4 & X86_CR4_OSFXSR))
7078 * return raisexcpt();
7079 */
7080 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR, idxLabelRaiseUd);
7081 /*
7082 * if (cr0 & X86_CR0_TS)
7083 * return raisexcpt();
7084 */
7085 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_TS, idxLabelRaiseNm);
7086
7087 /* Free but don't flush the CR0 and CR4 register. */
7088 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
7089 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
7090
7091 return off;
7092}
7093
7094
7095#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
7096 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
7097
7098/**
7099 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
7100 *
7101 * @returns New code buffer offset, UINT32_MAX on failure.
7102 * @param pReNative The native recompile state.
7103 * @param off The code buffer offset.
7104 * @param idxInstr The current instruction.
7105 */
7106DECL_INLINE_THROW(uint32_t)
7107iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7108{
7109 /*
7110 * Make sure we don't have any outstanding guest register writes as we may
7111 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
7112 *
7113 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
7114 */
7115 off = iemNativeRegFlushPendingWrites(pReNative, off);
7116
7117#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7118 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7119#else
7120 RT_NOREF(idxInstr);
7121#endif
7122
7123 /* Allocate a temporary CR0, CR4 and XCR0 register. */
7124 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
7125 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
7126 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
7127 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
7128 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
7129
7130#if 1
7131 off = iemNativeEmitBrk(pReNative, off, 0x4223); /** @todo Test this when AVX gets actually available. */
7132#endif
7133
7134 /** @todo r=aeichner Optimize this more later to have less compares and branches,
7135 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
7136 * actual performance benefit first). */
7137 /*
7138 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
7139 * return raisexcpt();
7140 */
7141 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
7142 off = iemNativeEmitOrGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
7143 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
7144 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7145
7146 /*
7147 * if (!(cr4 & X86_CR4_OSXSAVE))
7148 * return raisexcpt();
7149 */
7150 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE, idxLabelRaiseUd);
7151 /*
7152 * if (cr0 & X86_CR0_TS)
7153 * return raisexcpt();
7154 */
7155 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_TS, idxLabelRaiseNm);
7156
7157 /* Free but don't flush the CR0, CR4 and XCR0 register. */
7158 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
7159 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
7160 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
7161
7162 return off;
7163}
7164
7165
7166
7167/*********************************************************************************************************************************
7168* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
7169*********************************************************************************************************************************/
7170
7171/**
7172 * Pushes an IEM_MC_IF_XXX onto the condition stack.
7173 *
7174 * @returns Pointer to the condition stack entry on success, NULL on failure
7175 * (too many nestings)
7176 */
7177DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
7178{
7179#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7180 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
7181#endif
7182
7183 uint32_t const idxStack = pReNative->cCondDepth;
7184 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
7185
7186 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
7187 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
7188
7189 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
7190 pEntry->fInElse = false;
7191 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
7192 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
7193
7194 return pEntry;
7195}
7196
7197
7198/**
7199 * Start of the if-block, snapshotting the register and variable state.
7200 */
7201DECL_INLINE_THROW(void)
7202iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
7203{
7204 Assert(offIfBlock != UINT32_MAX);
7205 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7206 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7207 Assert(!pEntry->fInElse);
7208
7209 /* Define the start of the IF block if request or for disassembly purposes. */
7210 if (idxLabelIf != UINT32_MAX)
7211 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
7212#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7213 else
7214 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
7215#else
7216 RT_NOREF(offIfBlock);
7217#endif
7218
7219#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7220 Assert(pReNative->Core.offPc == 0);
7221#endif
7222
7223 /* Copy the initial state so we can restore it in the 'else' block. */
7224 pEntry->InitialState = pReNative->Core;
7225}
7226
7227
7228#define IEM_MC_ELSE() } while (0); \
7229 off = iemNativeEmitElse(pReNative, off); \
7230 do {
7231
7232/** Emits code related to IEM_MC_ELSE. */
7233DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7234{
7235 /* Check sanity and get the conditional stack entry. */
7236 Assert(off != UINT32_MAX);
7237 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7238 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7239 Assert(!pEntry->fInElse);
7240
7241 /* Jump to the endif */
7242 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
7243
7244 /* Define the else label and enter the else part of the condition. */
7245 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
7246 pEntry->fInElse = true;
7247
7248#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7249 Assert(pReNative->Core.offPc == 0);
7250#endif
7251
7252 /* Snapshot the core state so we can do a merge at the endif and restore
7253 the snapshot we took at the start of the if-block. */
7254 pEntry->IfFinalState = pReNative->Core;
7255 pReNative->Core = pEntry->InitialState;
7256
7257 return off;
7258}
7259
7260
7261#define IEM_MC_ENDIF() } while (0); \
7262 off = iemNativeEmitEndIf(pReNative, off)
7263
7264/** Emits code related to IEM_MC_ENDIF. */
7265DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7266{
7267 /* Check sanity and get the conditional stack entry. */
7268 Assert(off != UINT32_MAX);
7269 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7270 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7271
7272#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7273 Assert(pReNative->Core.offPc == 0);
7274#endif
7275
7276 /*
7277 * Now we have find common group with the core state at the end of the
7278 * if-final. Use the smallest common denominator and just drop anything
7279 * that isn't the same in both states.
7280 */
7281 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
7282 * which is why we're doing this at the end of the else-block.
7283 * But we'd need more info about future for that to be worth the effort. */
7284 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
7285 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
7286 {
7287 /* shadow guest stuff first. */
7288 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
7289 if (fGstRegs)
7290 {
7291 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
7292 do
7293 {
7294 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
7295 fGstRegs &= ~RT_BIT_64(idxGstReg);
7296
7297 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
7298 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
7299 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
7300 {
7301 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
7302 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
7303 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
7304 }
7305 } while (fGstRegs);
7306 }
7307 else
7308 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
7309
7310 /* Check variables next. For now we must require them to be identical
7311 or stuff we can recreate. */
7312 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
7313 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
7314 if (fVars)
7315 {
7316 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
7317 do
7318 {
7319 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
7320 fVars &= ~RT_BIT_32(idxVar);
7321
7322 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
7323 {
7324 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
7325 continue;
7326 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7327 {
7328 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7329 if (idxHstReg != UINT8_MAX)
7330 {
7331 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7332 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7333 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
7334 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7335 }
7336 continue;
7337 }
7338 }
7339 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
7340 continue;
7341
7342 /* Irreconcilable, so drop it. */
7343 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7344 if (idxHstReg != UINT8_MAX)
7345 {
7346 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7347 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7348 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
7349 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7350 }
7351 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7352 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7353 } while (fVars);
7354 }
7355
7356 /* Finally, check that the host register allocations matches. */
7357 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
7358 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
7359 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
7360 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
7361 }
7362
7363 /*
7364 * Define the endif label and maybe the else one if we're still in the 'if' part.
7365 */
7366 if (!pEntry->fInElse)
7367 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
7368 else
7369 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
7370 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
7371
7372 /* Pop the conditional stack.*/
7373 pReNative->cCondDepth -= 1;
7374
7375 return off;
7376}
7377
7378
7379#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
7380 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
7381 do {
7382
7383/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
7384DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7385{
7386 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7387
7388 /* Get the eflags. */
7389 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7390 kIemNativeGstRegUse_ReadOnly);
7391
7392 /* Test and jump. */
7393 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7394
7395 /* Free but don't flush the EFlags register. */
7396 iemNativeRegFreeTmp(pReNative, idxEflReg);
7397
7398 /* Make a copy of the core state now as we start the if-block. */
7399 iemNativeCondStartIfBlock(pReNative, off);
7400
7401 return off;
7402}
7403
7404
7405#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
7406 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
7407 do {
7408
7409/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
7410DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7411{
7412 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7413
7414 /* Get the eflags. */
7415 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7416 kIemNativeGstRegUse_ReadOnly);
7417
7418 /* Test and jump. */
7419 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7420
7421 /* Free but don't flush the EFlags register. */
7422 iemNativeRegFreeTmp(pReNative, idxEflReg);
7423
7424 /* Make a copy of the core state now as we start the if-block. */
7425 iemNativeCondStartIfBlock(pReNative, off);
7426
7427 return off;
7428}
7429
7430
7431#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
7432 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
7433 do {
7434
7435/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
7436DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7437{
7438 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7439
7440 /* Get the eflags. */
7441 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7442 kIemNativeGstRegUse_ReadOnly);
7443
7444 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7445 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7446
7447 /* Test and jump. */
7448 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7449
7450 /* Free but don't flush the EFlags register. */
7451 iemNativeRegFreeTmp(pReNative, idxEflReg);
7452
7453 /* Make a copy of the core state now as we start the if-block. */
7454 iemNativeCondStartIfBlock(pReNative, off);
7455
7456 return off;
7457}
7458
7459
7460#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
7461 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
7462 do {
7463
7464/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
7465DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7466{
7467 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7468
7469 /* Get the eflags. */
7470 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7471 kIemNativeGstRegUse_ReadOnly);
7472
7473 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7474 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7475
7476 /* Test and jump. */
7477 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7478
7479 /* Free but don't flush the EFlags register. */
7480 iemNativeRegFreeTmp(pReNative, idxEflReg);
7481
7482 /* Make a copy of the core state now as we start the if-block. */
7483 iemNativeCondStartIfBlock(pReNative, off);
7484
7485 return off;
7486}
7487
7488
7489#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
7490 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
7491 do {
7492
7493#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
7494 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
7495 do {
7496
7497/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
7498DECL_INLINE_THROW(uint32_t)
7499iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7500 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7501{
7502 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7503
7504 /* Get the eflags. */
7505 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7506 kIemNativeGstRegUse_ReadOnly);
7507
7508 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7509 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7510
7511 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7512 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7513 Assert(iBitNo1 != iBitNo2);
7514
7515#ifdef RT_ARCH_AMD64
7516 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
7517
7518 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7519 if (iBitNo1 > iBitNo2)
7520 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7521 else
7522 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7523 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7524
7525#elif defined(RT_ARCH_ARM64)
7526 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7527 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7528
7529 /* and tmpreg, eflreg, #1<<iBitNo1 */
7530 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7531
7532 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7533 if (iBitNo1 > iBitNo2)
7534 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7535 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7536 else
7537 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7538 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7539
7540 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7541
7542#else
7543# error "Port me"
7544#endif
7545
7546 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7547 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7548 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7549
7550 /* Free but don't flush the EFlags and tmp registers. */
7551 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7552 iemNativeRegFreeTmp(pReNative, idxEflReg);
7553
7554 /* Make a copy of the core state now as we start the if-block. */
7555 iemNativeCondStartIfBlock(pReNative, off);
7556
7557 return off;
7558}
7559
7560
7561#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
7562 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
7563 do {
7564
7565#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
7566 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
7567 do {
7568
7569/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
7570 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
7571DECL_INLINE_THROW(uint32_t)
7572iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
7573 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7574{
7575 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7576
7577 /* We need an if-block label for the non-inverted variant. */
7578 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
7579 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
7580
7581 /* Get the eflags. */
7582 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7583 kIemNativeGstRegUse_ReadOnly);
7584
7585 /* Translate the flag masks to bit numbers. */
7586 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7587 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7588
7589 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7590 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7591 Assert(iBitNo1 != iBitNo);
7592
7593 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7594 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7595 Assert(iBitNo2 != iBitNo);
7596 Assert(iBitNo2 != iBitNo1);
7597
7598#ifdef RT_ARCH_AMD64
7599 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
7600#elif defined(RT_ARCH_ARM64)
7601 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7602#endif
7603
7604 /* Check for the lone bit first. */
7605 if (!fInverted)
7606 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7607 else
7608 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
7609
7610 /* Then extract and compare the other two bits. */
7611#ifdef RT_ARCH_AMD64
7612 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7613 if (iBitNo1 > iBitNo2)
7614 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7615 else
7616 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7617 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7618
7619#elif defined(RT_ARCH_ARM64)
7620 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7621
7622 /* and tmpreg, eflreg, #1<<iBitNo1 */
7623 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7624
7625 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7626 if (iBitNo1 > iBitNo2)
7627 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7628 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7629 else
7630 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7631 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7632
7633 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7634
7635#else
7636# error "Port me"
7637#endif
7638
7639 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7640 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7641 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7642
7643 /* Free but don't flush the EFlags and tmp registers. */
7644 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7645 iemNativeRegFreeTmp(pReNative, idxEflReg);
7646
7647 /* Make a copy of the core state now as we start the if-block. */
7648 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
7649
7650 return off;
7651}
7652
7653
7654#define IEM_MC_IF_CX_IS_NZ() \
7655 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
7656 do {
7657
7658/** Emits code for IEM_MC_IF_CX_IS_NZ. */
7659DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7660{
7661 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7662
7663 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7664 kIemNativeGstRegUse_ReadOnly);
7665 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
7666 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7667
7668 iemNativeCondStartIfBlock(pReNative, off);
7669 return off;
7670}
7671
7672
7673#define IEM_MC_IF_ECX_IS_NZ() \
7674 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
7675 do {
7676
7677#define IEM_MC_IF_RCX_IS_NZ() \
7678 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
7679 do {
7680
7681/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
7682DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7683{
7684 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7685
7686 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7687 kIemNativeGstRegUse_ReadOnly);
7688 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
7689 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7690
7691 iemNativeCondStartIfBlock(pReNative, off);
7692 return off;
7693}
7694
7695
7696#define IEM_MC_IF_CX_IS_NOT_ONE() \
7697 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
7698 do {
7699
7700/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
7701DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7702{
7703 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7704
7705 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7706 kIemNativeGstRegUse_ReadOnly);
7707#ifdef RT_ARCH_AMD64
7708 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7709#else
7710 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7711 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7712 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7713#endif
7714 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7715
7716 iemNativeCondStartIfBlock(pReNative, off);
7717 return off;
7718}
7719
7720
7721#define IEM_MC_IF_ECX_IS_NOT_ONE() \
7722 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
7723 do {
7724
7725#define IEM_MC_IF_RCX_IS_NOT_ONE() \
7726 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
7727 do {
7728
7729/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
7730DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7731{
7732 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7733
7734 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7735 kIemNativeGstRegUse_ReadOnly);
7736 if (f64Bit)
7737 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7738 else
7739 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7740 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7741
7742 iemNativeCondStartIfBlock(pReNative, off);
7743 return off;
7744}
7745
7746
7747#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7748 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
7749 do {
7750
7751#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7752 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
7753 do {
7754
7755/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
7756 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7757DECL_INLINE_THROW(uint32_t)
7758iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
7759{
7760 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7761
7762 /* We have to load both RCX and EFLAGS before we can start branching,
7763 otherwise we'll end up in the else-block with an inconsistent
7764 register allocator state.
7765 Doing EFLAGS first as it's more likely to be loaded, right? */
7766 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7767 kIemNativeGstRegUse_ReadOnly);
7768 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7769 kIemNativeGstRegUse_ReadOnly);
7770
7771 /** @todo we could reduce this to a single branch instruction by spending a
7772 * temporary register and some setnz stuff. Not sure if loops are
7773 * worth it. */
7774 /* Check CX. */
7775#ifdef RT_ARCH_AMD64
7776 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7777#else
7778 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7779 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7780 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7781#endif
7782
7783 /* Check the EFlags bit. */
7784 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7785 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7786 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7787 !fCheckIfSet /*fJmpIfSet*/);
7788
7789 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7790 iemNativeRegFreeTmp(pReNative, idxEflReg);
7791
7792 iemNativeCondStartIfBlock(pReNative, off);
7793 return off;
7794}
7795
7796
7797#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7798 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
7799 do {
7800
7801#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7802 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
7803 do {
7804
7805#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7806 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
7807 do {
7808
7809#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7810 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
7811 do {
7812
7813/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
7814 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
7815 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
7816 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7817DECL_INLINE_THROW(uint32_t)
7818iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7819 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
7820{
7821 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7822
7823 /* We have to load both RCX and EFLAGS before we can start branching,
7824 otherwise we'll end up in the else-block with an inconsistent
7825 register allocator state.
7826 Doing EFLAGS first as it's more likely to be loaded, right? */
7827 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7828 kIemNativeGstRegUse_ReadOnly);
7829 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7830 kIemNativeGstRegUse_ReadOnly);
7831
7832 /** @todo we could reduce this to a single branch instruction by spending a
7833 * temporary register and some setnz stuff. Not sure if loops are
7834 * worth it. */
7835 /* Check RCX/ECX. */
7836 if (f64Bit)
7837 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7838 else
7839 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7840
7841 /* Check the EFlags bit. */
7842 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7843 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7844 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7845 !fCheckIfSet /*fJmpIfSet*/);
7846
7847 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7848 iemNativeRegFreeTmp(pReNative, idxEflReg);
7849
7850 iemNativeCondStartIfBlock(pReNative, off);
7851 return off;
7852}
7853
7854
7855
7856/*********************************************************************************************************************************
7857* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7858*********************************************************************************************************************************/
7859/** Number of hidden arguments for CIMPL calls.
7860 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7861#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7862# define IEM_CIMPL_HIDDEN_ARGS 3
7863#else
7864# define IEM_CIMPL_HIDDEN_ARGS 2
7865#endif
7866
7867#define IEM_MC_NOREF(a_Name) \
7868 RT_NOREF_PV(a_Name)
7869
7870#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7871 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7872
7873#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7874 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7875
7876#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7877 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7878
7879#define IEM_MC_LOCAL(a_Type, a_Name) \
7880 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7881
7882#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7883 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7884
7885
7886/**
7887 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7888 */
7889DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7890{
7891 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7892 return IEM_CIMPL_HIDDEN_ARGS;
7893 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7894 return 1;
7895 return 0;
7896}
7897
7898
7899/**
7900 * Internal work that allocates a variable with kind set to
7901 * kIemNativeVarKind_Invalid and no current stack allocation.
7902 *
7903 * The kind will either be set by the caller or later when the variable is first
7904 * assigned a value.
7905 *
7906 * @returns Unpacked index.
7907 * @internal
7908 */
7909static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7910{
7911 Assert(cbType > 0 && cbType <= 64);
7912 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7913 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7914 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7915 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7916 pReNative->Core.aVars[idxVar].cbVar = cbType;
7917 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7918 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7919 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7920 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7921 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7922 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7923 pReNative->Core.aVars[idxVar].u.uValue = 0;
7924 return idxVar;
7925}
7926
7927
7928/**
7929 * Internal work that allocates an argument variable w/o setting enmKind.
7930 *
7931 * @returns Unpacked index.
7932 * @internal
7933 */
7934static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7935{
7936 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7937 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7938 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7939
7940 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7941 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7942 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7943 return idxVar;
7944}
7945
7946
7947/**
7948 * Gets the stack slot for a stack variable, allocating one if necessary.
7949 *
7950 * Calling this function implies that the stack slot will contain a valid
7951 * variable value. The caller deals with any register currently assigned to the
7952 * variable, typically by spilling it into the stack slot.
7953 *
7954 * @returns The stack slot number.
7955 * @param pReNative The recompiler state.
7956 * @param idxVar The variable.
7957 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7958 */
7959DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7960{
7961 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7962 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7963 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7964
7965 /* Already got a slot? */
7966 uint8_t const idxStackSlot = pVar->idxStackSlot;
7967 if (idxStackSlot != UINT8_MAX)
7968 {
7969 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7970 return idxStackSlot;
7971 }
7972
7973 /*
7974 * A single slot is easy to allocate.
7975 * Allocate them from the top end, closest to BP, to reduce the displacement.
7976 */
7977 if (pVar->cbVar <= sizeof(uint64_t))
7978 {
7979 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7980 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7981 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7982 pVar->idxStackSlot = (uint8_t)iSlot;
7983 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7984 return (uint8_t)iSlot;
7985 }
7986
7987 /*
7988 * We need more than one stack slot.
7989 *
7990 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7991 */
7992 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7993 Assert(pVar->cbVar <= 64);
7994 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7995 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7996 uint32_t bmStack = ~pReNative->Core.bmStack;
7997 while (bmStack != UINT32_MAX)
7998 {
7999/** @todo allocate from the top to reduce BP displacement. */
8000 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
8001 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8002 if (!(iSlot & fBitAlignMask))
8003 {
8004 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
8005 {
8006 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
8007 pVar->idxStackSlot = (uint8_t)iSlot;
8008 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8009 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
8010 return (uint8_t)iSlot;
8011 }
8012 }
8013 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
8014 }
8015 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8016}
8017
8018
8019/**
8020 * Changes the variable to a stack variable.
8021 *
8022 * Currently this is s only possible to do the first time the variable is used,
8023 * switching later is can be implemented but not done.
8024 *
8025 * @param pReNative The recompiler state.
8026 * @param idxVar The variable.
8027 * @throws VERR_IEM_VAR_IPE_2
8028 */
8029static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8030{
8031 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8032 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8033 if (pVar->enmKind != kIemNativeVarKind_Stack)
8034 {
8035 /* We could in theory transition from immediate to stack as well, but it
8036 would involve the caller doing work storing the value on the stack. So,
8037 till that's required we only allow transition from invalid. */
8038 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8039 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8040 pVar->enmKind = kIemNativeVarKind_Stack;
8041
8042 /* Note! We don't allocate a stack slot here, that's only done when a
8043 slot is actually needed to hold a variable value. */
8044 }
8045}
8046
8047
8048/**
8049 * Sets it to a variable with a constant value.
8050 *
8051 * This does not require stack storage as we know the value and can always
8052 * reload it, unless of course it's referenced.
8053 *
8054 * @param pReNative The recompiler state.
8055 * @param idxVar The variable.
8056 * @param uValue The immediate value.
8057 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8058 */
8059static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
8060{
8061 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8062 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8063 if (pVar->enmKind != kIemNativeVarKind_Immediate)
8064 {
8065 /* Only simple transitions for now. */
8066 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8067 pVar->enmKind = kIemNativeVarKind_Immediate;
8068 }
8069 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8070
8071 pVar->u.uValue = uValue;
8072 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
8073 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
8074 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
8075}
8076
8077
8078/**
8079 * Sets the variable to a reference (pointer) to @a idxOtherVar.
8080 *
8081 * This does not require stack storage as we know the value and can always
8082 * reload it. Loading is postponed till needed.
8083 *
8084 * @param pReNative The recompiler state.
8085 * @param idxVar The variable. Unpacked.
8086 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
8087 *
8088 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8089 * @internal
8090 */
8091static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
8092{
8093 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
8094 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
8095
8096 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
8097 {
8098 /* Only simple transitions for now. */
8099 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
8100 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8101 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
8102 }
8103 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8104
8105 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
8106
8107 /* Update the other variable, ensure it's a stack variable. */
8108 /** @todo handle variables with const values... that'll go boom now. */
8109 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
8110 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8111}
8112
8113
8114/**
8115 * Sets the variable to a reference (pointer) to a guest register reference.
8116 *
8117 * This does not require stack storage as we know the value and can always
8118 * reload it. Loading is postponed till needed.
8119 *
8120 * @param pReNative The recompiler state.
8121 * @param idxVar The variable.
8122 * @param enmRegClass The class guest registers to reference.
8123 * @param idxReg The register within @a enmRegClass to reference.
8124 *
8125 * @throws VERR_IEM_VAR_IPE_2
8126 */
8127static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8128 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
8129{
8130 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8131 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8132
8133 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
8134 {
8135 /* Only simple transitions for now. */
8136 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8137 pVar->enmKind = kIemNativeVarKind_GstRegRef;
8138 }
8139 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8140
8141 pVar->u.GstRegRef.enmClass = enmRegClass;
8142 pVar->u.GstRegRef.idx = idxReg;
8143}
8144
8145
8146DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
8147{
8148 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8149}
8150
8151
8152DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
8153{
8154 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8155
8156 /* Since we're using a generic uint64_t value type, we must truncate it if
8157 the variable is smaller otherwise we may end up with too large value when
8158 scaling up a imm8 w/ sign-extension.
8159
8160 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
8161 in the bios, bx=1) when running on arm, because clang expect 16-bit
8162 register parameters to have bits 16 and up set to zero. Instead of
8163 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
8164 CF value in the result. */
8165 switch (cbType)
8166 {
8167 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8168 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8169 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8170 }
8171 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8172 return idxVar;
8173}
8174
8175
8176DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
8177{
8178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
8179 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
8180 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
8181 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
8182 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
8183 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
8184
8185 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
8186 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
8187 return idxArgVar;
8188}
8189
8190
8191DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
8192{
8193 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8194 /* Don't set to stack now, leave that to the first use as for instance
8195 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
8196 return idxVar;
8197}
8198
8199
8200DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
8201{
8202 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8203
8204 /* Since we're using a generic uint64_t value type, we must truncate it if
8205 the variable is smaller otherwise we may end up with too large value when
8206 scaling up a imm8 w/ sign-extension. */
8207 switch (cbType)
8208 {
8209 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8210 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8211 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8212 }
8213 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8214 return idxVar;
8215}
8216
8217
8218/**
8219 * Makes sure variable @a idxVar has a register assigned to it and that it stays
8220 * fixed till we call iemNativeVarRegisterRelease.
8221 *
8222 * @returns The host register number.
8223 * @param pReNative The recompiler state.
8224 * @param idxVar The variable.
8225 * @param poff Pointer to the instruction buffer offset.
8226 * In case a register needs to be freed up or the value
8227 * loaded off the stack.
8228 * @param fInitialized Set if the variable must already have been initialized.
8229 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8230 * the case.
8231 * @param idxRegPref Preferred register number or UINT8_MAX.
8232 */
8233DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8234 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8235{
8236 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8237 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8238 Assert(pVar->cbVar <= 8);
8239 Assert(!pVar->fRegAcquired);
8240
8241 uint8_t idxReg = pVar->idxReg;
8242 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8243 {
8244 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8245 && pVar->enmKind < kIemNativeVarKind_End);
8246 pVar->fRegAcquired = true;
8247 return idxReg;
8248 }
8249
8250 /*
8251 * If the kind of variable has not yet been set, default to 'stack'.
8252 */
8253 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8254 && pVar->enmKind < kIemNativeVarKind_End);
8255 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8256 iemNativeVarSetKindToStack(pReNative, idxVar);
8257
8258 /*
8259 * We have to allocate a register for the variable, even if its a stack one
8260 * as we don't know if there are modification being made to it before its
8261 * finalized (todo: analyze and insert hints about that?).
8262 *
8263 * If we can, we try get the correct register for argument variables. This
8264 * is assuming that most argument variables are fetched as close as possible
8265 * to the actual call, so that there aren't any interfering hidden calls
8266 * (memory accesses, etc) inbetween.
8267 *
8268 * If we cannot or it's a variable, we make sure no argument registers
8269 * that will be used by this MC block will be allocated here, and we always
8270 * prefer non-volatile registers to avoid needing to spill stuff for internal
8271 * call.
8272 */
8273 /** @todo Detect too early argument value fetches and warn about hidden
8274 * calls causing less optimal code to be generated in the python script. */
8275
8276 uint8_t const uArgNo = pVar->uArgNo;
8277 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
8278 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
8279 {
8280 idxReg = g_aidxIemNativeCallRegs[uArgNo];
8281 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8282 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
8283 }
8284 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
8285 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
8286 {
8287 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8288 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
8289 & ~pReNative->Core.bmHstRegsWithGstShadow
8290 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
8291 & fNotArgsMask;
8292 if (fRegs)
8293 {
8294 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
8295 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
8296 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
8297 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
8298 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
8299 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8300 }
8301 else
8302 {
8303 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8304 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
8305 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8306 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8307 }
8308 }
8309 else
8310 {
8311 idxReg = idxRegPref;
8312 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8313 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8314 }
8315 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8316 pVar->idxReg = idxReg;
8317
8318 /*
8319 * Load it off the stack if we've got a stack slot.
8320 */
8321 uint8_t const idxStackSlot = pVar->idxStackSlot;
8322 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8323 {
8324 Assert(fInitialized);
8325 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8326 switch (pVar->cbVar)
8327 {
8328 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
8329 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
8330 case 3: AssertFailed(); RT_FALL_THRU();
8331 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
8332 default: AssertFailed(); RT_FALL_THRU();
8333 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
8334 }
8335 }
8336 else
8337 {
8338 Assert(idxStackSlot == UINT8_MAX);
8339 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8340 }
8341 pVar->fRegAcquired = true;
8342 return idxReg;
8343}
8344
8345
8346/**
8347 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8348 * guest register.
8349 *
8350 * This function makes sure there is a register for it and sets it to be the
8351 * current shadow copy of @a enmGstReg.
8352 *
8353 * @returns The host register number.
8354 * @param pReNative The recompiler state.
8355 * @param idxVar The variable.
8356 * @param enmGstReg The guest register this variable will be written to
8357 * after this call.
8358 * @param poff Pointer to the instruction buffer offset.
8359 * In case a register needs to be freed up or if the
8360 * variable content needs to be loaded off the stack.
8361 *
8362 * @note We DO NOT expect @a idxVar to be an argument variable,
8363 * because we can only in the commit stage of an instruction when this
8364 * function is used.
8365 */
8366DECL_HIDDEN_THROW(uint8_t)
8367iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8368{
8369 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8370 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8371 Assert(!pVar->fRegAcquired);
8372 AssertMsgStmt( pVar->cbVar <= 8
8373 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8374 || pVar->enmKind == kIemNativeVarKind_Stack),
8375 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8376 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8377 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8378
8379 /*
8380 * This shouldn't ever be used for arguments, unless it's in a weird else
8381 * branch that doesn't do any calling and even then it's questionable.
8382 *
8383 * However, in case someone writes crazy wrong MC code and does register
8384 * updates before making calls, just use the regular register allocator to
8385 * ensure we get a register suitable for the intended argument number.
8386 */
8387 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8388
8389 /*
8390 * If there is already a register for the variable, we transfer/set the
8391 * guest shadow copy assignment to it.
8392 */
8393 uint8_t idxReg = pVar->idxReg;
8394 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8395 {
8396 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8397 {
8398 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8399 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8400 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8401 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8402 }
8403 else
8404 {
8405 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8406 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8407 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8408 }
8409 /** @todo figure this one out. We need some way of making sure the register isn't
8410 * modified after this point, just in case we start writing crappy MC code. */
8411 pVar->enmGstReg = enmGstReg;
8412 pVar->fRegAcquired = true;
8413 return idxReg;
8414 }
8415 Assert(pVar->uArgNo == UINT8_MAX);
8416
8417 /*
8418 * Because this is supposed to be the commit stage, we're just tag along with the
8419 * temporary register allocator and upgrade it to a variable register.
8420 */
8421 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8422 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8423 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8424 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8425 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8426 pVar->idxReg = idxReg;
8427
8428 /*
8429 * Now we need to load the register value.
8430 */
8431 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8432 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8433 else
8434 {
8435 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8436 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8437 switch (pVar->cbVar)
8438 {
8439 case sizeof(uint64_t):
8440 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8441 break;
8442 case sizeof(uint32_t):
8443 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8444 break;
8445 case sizeof(uint16_t):
8446 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8447 break;
8448 case sizeof(uint8_t):
8449 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8450 break;
8451 default:
8452 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8453 }
8454 }
8455
8456 pVar->fRegAcquired = true;
8457 return idxReg;
8458}
8459
8460
8461/**
8462 * Sets the host register for @a idxVarRc to @a idxReg.
8463 *
8464 * The register must not be allocated. Any guest register shadowing will be
8465 * implictly dropped by this call.
8466 *
8467 * The variable must not have any register associated with it (causes
8468 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
8469 * implied.
8470 *
8471 * @returns idxReg
8472 * @param pReNative The recompiler state.
8473 * @param idxVar The variable.
8474 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
8475 * @param off For recording in debug info.
8476 *
8477 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
8478 */
8479DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
8480{
8481 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8482 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8483 Assert(!pVar->fRegAcquired);
8484 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8485 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
8486 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
8487
8488 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
8489 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8490
8491 iemNativeVarSetKindToStack(pReNative, idxVar);
8492 pVar->idxReg = idxReg;
8493
8494 return idxReg;
8495}
8496
8497
8498/**
8499 * A convenient helper function.
8500 */
8501DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8502 uint8_t idxReg, uint32_t *poff)
8503{
8504 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
8505 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
8506 return idxReg;
8507}
8508
8509
8510/**
8511 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8512 *
8513 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8514 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8515 * requirement of flushing anything in volatile host registers when making a
8516 * call.
8517 *
8518 * @returns New @a off value.
8519 * @param pReNative The recompiler state.
8520 * @param off The code buffer position.
8521 * @param fHstRegsNotToSave Set of registers not to save & restore.
8522 */
8523DECL_HIDDEN_THROW(uint32_t)
8524iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8525{
8526 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8527 if (fHstRegs)
8528 {
8529 do
8530 {
8531 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8532 fHstRegs &= ~RT_BIT_32(idxHstReg);
8533
8534 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8535 {
8536 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8537 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8538 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8539 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8540 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8541 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8542 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8543 {
8544 case kIemNativeVarKind_Stack:
8545 {
8546 /* Temporarily spill the variable register. */
8547 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8548 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8549 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8550 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8551 continue;
8552 }
8553
8554 case kIemNativeVarKind_Immediate:
8555 case kIemNativeVarKind_VarRef:
8556 case kIemNativeVarKind_GstRegRef:
8557 /* It is weird to have any of these loaded at this point. */
8558 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8559 continue;
8560
8561 case kIemNativeVarKind_End:
8562 case kIemNativeVarKind_Invalid:
8563 break;
8564 }
8565 AssertFailed();
8566 }
8567 else
8568 {
8569 /*
8570 * Allocate a temporary stack slot and spill the register to it.
8571 */
8572 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8573 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8574 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8575 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8576 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8577 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8578 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8579 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8580 }
8581 } while (fHstRegs);
8582 }
8583 return off;
8584}
8585
8586
8587/**
8588 * Emit code to restore volatile registers after to a call to a helper.
8589 *
8590 * @returns New @a off value.
8591 * @param pReNative The recompiler state.
8592 * @param off The code buffer position.
8593 * @param fHstRegsNotToSave Set of registers not to save & restore.
8594 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8595 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8596 */
8597DECL_HIDDEN_THROW(uint32_t)
8598iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8599{
8600 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8601 if (fHstRegs)
8602 {
8603 do
8604 {
8605 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8606 fHstRegs &= ~RT_BIT_32(idxHstReg);
8607
8608 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8609 {
8610 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8611 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8612 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8613 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8614 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8615 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8616 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8617 {
8618 case kIemNativeVarKind_Stack:
8619 {
8620 /* Unspill the variable register. */
8621 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8622 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8623 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8624 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8625 continue;
8626 }
8627
8628 case kIemNativeVarKind_Immediate:
8629 case kIemNativeVarKind_VarRef:
8630 case kIemNativeVarKind_GstRegRef:
8631 /* It is weird to have any of these loaded at this point. */
8632 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8633 continue;
8634
8635 case kIemNativeVarKind_End:
8636 case kIemNativeVarKind_Invalid:
8637 break;
8638 }
8639 AssertFailed();
8640 }
8641 else
8642 {
8643 /*
8644 * Restore from temporary stack slot.
8645 */
8646 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8647 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8648 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8649 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8650
8651 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8652 }
8653 } while (fHstRegs);
8654 }
8655 return off;
8656}
8657
8658
8659/**
8660 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8661 *
8662 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8663 *
8664 * ASSUMES that @a idxVar is valid and unpacked.
8665 */
8666DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8667{
8668 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8669 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8670 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8671 {
8672 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8673 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8674 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8675 Assert(cSlots > 0);
8676 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8677 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8678 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8679 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8680 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8681 }
8682 else
8683 Assert(idxStackSlot == UINT8_MAX);
8684}
8685
8686
8687/**
8688 * Worker that frees a single variable.
8689 *
8690 * ASSUMES that @a idxVar is valid and unpacked.
8691 */
8692DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8693{
8694 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8695 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8696 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8697
8698 /* Free the host register first if any assigned. */
8699 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8700 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8701 {
8702 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8703 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8704 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8705 }
8706
8707 /* Free argument mapping. */
8708 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8709 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8710 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8711
8712 /* Free the stack slots. */
8713 iemNativeVarFreeStackSlots(pReNative, idxVar);
8714
8715 /* Free the actual variable. */
8716 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8717 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8718}
8719
8720
8721/**
8722 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8723 */
8724DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8725{
8726 while (bmVars != 0)
8727 {
8728 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8729 bmVars &= ~RT_BIT_32(idxVar);
8730
8731#if 1 /** @todo optimize by simplifying this later... */
8732 iemNativeVarFreeOneWorker(pReNative, idxVar);
8733#else
8734 /* Only need to free the host register, the rest is done as bulk updates below. */
8735 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8736 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8737 {
8738 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8739 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8740 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8741 }
8742#endif
8743 }
8744#if 0 /** @todo optimize by simplifying this later... */
8745 pReNative->Core.bmVars = 0;
8746 pReNative->Core.bmStack = 0;
8747 pReNative->Core.u64ArgVars = UINT64_MAX;
8748#endif
8749}
8750
8751
8752/**
8753 * This is called by IEM_MC_END() to clean up all variables.
8754 */
8755DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
8756{
8757 uint32_t const bmVars = pReNative->Core.bmVars;
8758 if (bmVars != 0)
8759 iemNativeVarFreeAllSlow(pReNative, bmVars);
8760 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8761 Assert(pReNative->Core.bmStack == 0);
8762}
8763
8764
8765#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
8766
8767/**
8768 * This is called by IEM_MC_FREE_LOCAL.
8769 */
8770DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8771{
8772 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8773 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
8774 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8775}
8776
8777
8778#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
8779
8780/**
8781 * This is called by IEM_MC_FREE_ARG.
8782 */
8783DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8784{
8785 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8786 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
8787 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8788}
8789
8790
8791#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
8792
8793/**
8794 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
8795 */
8796DECL_INLINE_THROW(uint32_t)
8797iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
8798{
8799 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
8800 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
8801 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8802 Assert( pVarDst->cbVar == sizeof(uint16_t)
8803 || pVarDst->cbVar == sizeof(uint32_t));
8804
8805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
8806 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
8807 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
8808 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
8809 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8810
8811 Assert(pVarDst->cbVar < pVarSrc->cbVar);
8812
8813 /*
8814 * Special case for immediates.
8815 */
8816 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
8817 {
8818 switch (pVarDst->cbVar)
8819 {
8820 case sizeof(uint16_t):
8821 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
8822 break;
8823 case sizeof(uint32_t):
8824 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
8825 break;
8826 default: AssertFailed(); break;
8827 }
8828 }
8829 else
8830 {
8831 /*
8832 * The generic solution for now.
8833 */
8834 /** @todo optimize this by having the python script make sure the source
8835 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
8836 * statement. Then we could just transfer the register assignments. */
8837 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
8838 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
8839 switch (pVarDst->cbVar)
8840 {
8841 case sizeof(uint16_t):
8842 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
8843 break;
8844 case sizeof(uint32_t):
8845 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8846 break;
8847 default: AssertFailed(); break;
8848 }
8849 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8850 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8851 }
8852 return off;
8853}
8854
8855
8856
8857/*********************************************************************************************************************************
8858* Emitters for IEM_MC_CALL_CIMPL_XXX *
8859*********************************************************************************************************************************/
8860
8861/**
8862 * Emits code to load a reference to the given guest register into @a idxGprDst.
8863 */
8864DECL_INLINE_THROW(uint32_t)
8865iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8866 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8867{
8868#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8869 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8870#endif
8871
8872 /*
8873 * Get the offset relative to the CPUMCTX structure.
8874 */
8875 uint32_t offCpumCtx;
8876 switch (enmClass)
8877 {
8878 case kIemNativeGstRegRef_Gpr:
8879 Assert(idxRegInClass < 16);
8880 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8881 break;
8882
8883 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8884 Assert(idxRegInClass < 4);
8885 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8886 break;
8887
8888 case kIemNativeGstRegRef_EFlags:
8889 Assert(idxRegInClass == 0);
8890 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8891 break;
8892
8893 case kIemNativeGstRegRef_MxCsr:
8894 Assert(idxRegInClass == 0);
8895 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8896 break;
8897
8898 case kIemNativeGstRegRef_FpuReg:
8899 Assert(idxRegInClass < 8);
8900 AssertFailed(); /** @todo what kind of indexing? */
8901 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8902 break;
8903
8904 case kIemNativeGstRegRef_MReg:
8905 Assert(idxRegInClass < 8);
8906 AssertFailed(); /** @todo what kind of indexing? */
8907 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8908 break;
8909
8910 case kIemNativeGstRegRef_XReg:
8911 Assert(idxRegInClass < 16);
8912 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8913 break;
8914
8915 default:
8916 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8917 }
8918
8919 /*
8920 * Load the value into the destination register.
8921 */
8922#ifdef RT_ARCH_AMD64
8923 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8924
8925#elif defined(RT_ARCH_ARM64)
8926 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8927 Assert(offCpumCtx < 4096);
8928 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8929
8930#else
8931# error "Port me!"
8932#endif
8933
8934 return off;
8935}
8936
8937
8938/**
8939 * Common code for CIMPL and AIMPL calls.
8940 *
8941 * These are calls that uses argument variables and such. They should not be
8942 * confused with internal calls required to implement an MC operation,
8943 * like a TLB load and similar.
8944 *
8945 * Upon return all that is left to do is to load any hidden arguments and
8946 * perform the call. All argument variables are freed.
8947 *
8948 * @returns New code buffer offset; throws VBox status code on error.
8949 * @param pReNative The native recompile state.
8950 * @param off The code buffer offset.
8951 * @param cArgs The total nubmer of arguments (includes hidden
8952 * count).
8953 * @param cHiddenArgs The number of hidden arguments. The hidden
8954 * arguments must not have any variable declared for
8955 * them, whereas all the regular arguments must
8956 * (tstIEMCheckMc ensures this).
8957 */
8958DECL_HIDDEN_THROW(uint32_t)
8959iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8960{
8961#ifdef VBOX_STRICT
8962 /*
8963 * Assert sanity.
8964 */
8965 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8966 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8967 for (unsigned i = 0; i < cHiddenArgs; i++)
8968 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8969 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8970 {
8971 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8972 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8973 }
8974 iemNativeRegAssertSanity(pReNative);
8975#endif
8976
8977 /* We don't know what the called function makes use of, so flush any pending register writes. */
8978 off = iemNativeRegFlushPendingWrites(pReNative, off);
8979
8980 /*
8981 * Before we do anything else, go over variables that are referenced and
8982 * make sure they are not in a register.
8983 */
8984 uint32_t bmVars = pReNative->Core.bmVars;
8985 if (bmVars)
8986 {
8987 do
8988 {
8989 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8990 bmVars &= ~RT_BIT_32(idxVar);
8991
8992 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8993 {
8994 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8995 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8996 {
8997 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8998 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8999 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9000 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9001 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9002
9003 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9004 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
9005 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9006 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
9007 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
9008 }
9009 }
9010 } while (bmVars != 0);
9011#if 0 //def VBOX_STRICT
9012 iemNativeRegAssertSanity(pReNative);
9013#endif
9014 }
9015
9016 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
9017
9018 /*
9019 * First, go over the host registers that will be used for arguments and make
9020 * sure they either hold the desired argument or are free.
9021 */
9022 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
9023 {
9024 for (uint32_t i = 0; i < cRegArgs; i++)
9025 {
9026 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9027 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9028 {
9029 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
9030 {
9031 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
9032 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9033 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9034 Assert(pVar->idxReg == idxArgReg);
9035 uint8_t const uArgNo = pVar->uArgNo;
9036 if (uArgNo == i)
9037 { /* prefect */ }
9038 /* The variable allocator logic should make sure this is impossible,
9039 except for when the return register is used as a parameter (ARM,
9040 but not x86). */
9041#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
9042 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
9043 {
9044# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9045# error "Implement this"
9046# endif
9047 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
9048 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
9049 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
9050 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9051 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
9052 }
9053#endif
9054 else
9055 {
9056 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9057
9058 if (pVar->enmKind == kIemNativeVarKind_Stack)
9059 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
9060 else
9061 {
9062 /* just free it, can be reloaded if used again */
9063 pVar->idxReg = UINT8_MAX;
9064 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
9065 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
9066 }
9067 }
9068 }
9069 else
9070 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
9071 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
9072 }
9073 }
9074#if 0 //def VBOX_STRICT
9075 iemNativeRegAssertSanity(pReNative);
9076#endif
9077 }
9078
9079 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
9080
9081#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9082 /*
9083 * If there are any stack arguments, make sure they are in their place as well.
9084 *
9085 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
9086 * the caller) be loading it later and it must be free (see first loop).
9087 */
9088 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
9089 {
9090 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
9091 {
9092 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9093 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
9094 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9095 {
9096 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
9097 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
9098 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
9099 pVar->idxReg = UINT8_MAX;
9100 }
9101 else
9102 {
9103 /* Use ARG0 as temp for stuff we need registers for. */
9104 switch (pVar->enmKind)
9105 {
9106 case kIemNativeVarKind_Stack:
9107 {
9108 uint8_t const idxStackSlot = pVar->idxStackSlot;
9109 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9110 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
9111 iemNativeStackCalcBpDisp(idxStackSlot));
9112 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9113 continue;
9114 }
9115
9116 case kIemNativeVarKind_Immediate:
9117 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
9118 continue;
9119
9120 case kIemNativeVarKind_VarRef:
9121 {
9122 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9123 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9124 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9125 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9126 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9127 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9128 {
9129 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9130 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9131 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9132 }
9133 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9134 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9135 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
9136 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9137 continue;
9138 }
9139
9140 case kIemNativeVarKind_GstRegRef:
9141 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
9142 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9143 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9144 continue;
9145
9146 case kIemNativeVarKind_Invalid:
9147 case kIemNativeVarKind_End:
9148 break;
9149 }
9150 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9151 }
9152 }
9153# if 0 //def VBOX_STRICT
9154 iemNativeRegAssertSanity(pReNative);
9155# endif
9156 }
9157#else
9158 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
9159#endif
9160
9161 /*
9162 * Make sure the argument variables are loaded into their respective registers.
9163 *
9164 * We can optimize this by ASSUMING that any register allocations are for
9165 * registeres that have already been loaded and are ready. The previous step
9166 * saw to that.
9167 */
9168 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
9169 {
9170 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9171 {
9172 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9173 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9174 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
9175 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
9176 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
9177 else
9178 {
9179 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9180 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9181 {
9182 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9183 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
9184 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
9185 | RT_BIT_32(idxArgReg);
9186 pVar->idxReg = idxArgReg;
9187 }
9188 else
9189 {
9190 /* Use ARG0 as temp for stuff we need registers for. */
9191 switch (pVar->enmKind)
9192 {
9193 case kIemNativeVarKind_Stack:
9194 {
9195 uint8_t const idxStackSlot = pVar->idxStackSlot;
9196 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9197 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
9198 continue;
9199 }
9200
9201 case kIemNativeVarKind_Immediate:
9202 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
9203 continue;
9204
9205 case kIemNativeVarKind_VarRef:
9206 {
9207 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9208 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9209 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
9210 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9211 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9212 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9213 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9214 {
9215 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9216 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9217 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9218 }
9219 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9220 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9221 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
9222 continue;
9223 }
9224
9225 case kIemNativeVarKind_GstRegRef:
9226 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
9227 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9228 continue;
9229
9230 case kIemNativeVarKind_Invalid:
9231 case kIemNativeVarKind_End:
9232 break;
9233 }
9234 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9235 }
9236 }
9237 }
9238#if 0 //def VBOX_STRICT
9239 iemNativeRegAssertSanity(pReNative);
9240#endif
9241 }
9242#ifdef VBOX_STRICT
9243 else
9244 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9245 {
9246 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
9247 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
9248 }
9249#endif
9250
9251 /*
9252 * Free all argument variables (simplified).
9253 * Their lifetime always expires with the call they are for.
9254 */
9255 /** @todo Make the python script check that arguments aren't used after
9256 * IEM_MC_CALL_XXXX. */
9257 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
9258 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
9259 * an argument value. There is also some FPU stuff. */
9260 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
9261 {
9262 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
9263 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
9264
9265 /* no need to free registers: */
9266 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
9267 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9268 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9269 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9270 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9271 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9272
9273 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9274 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9275 iemNativeVarFreeStackSlots(pReNative, idxVar);
9276 }
9277 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9278
9279 /*
9280 * Flush volatile registers as we make the call.
9281 */
9282 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9283
9284 return off;
9285}
9286
9287
9288/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
9289DECL_HIDDEN_THROW(uint32_t)
9290iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
9291 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
9292
9293{
9294 /*
9295 * Do all the call setup and cleanup.
9296 */
9297 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
9298
9299 /*
9300 * Load the two or three hidden arguments.
9301 */
9302#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9303 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
9304 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9305 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
9306#else
9307 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9308 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
9309#endif
9310
9311 /*
9312 * Make the call and check the return code.
9313 *
9314 * Shadow PC copies are always flushed here, other stuff depends on flags.
9315 * Segment and general purpose registers are explictily flushed via the
9316 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
9317 * macros.
9318 */
9319 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
9320#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9321 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
9322#endif
9323 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
9324 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
9325 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
9326 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
9327
9328 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
9329}
9330
9331
9332#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
9333 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
9334
9335/** Emits code for IEM_MC_CALL_CIMPL_1. */
9336DECL_INLINE_THROW(uint32_t)
9337iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9338 uintptr_t pfnCImpl, uint8_t idxArg0)
9339{
9340 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9341 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
9342}
9343
9344
9345#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
9346 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
9347
9348/** Emits code for IEM_MC_CALL_CIMPL_2. */
9349DECL_INLINE_THROW(uint32_t)
9350iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9351 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
9352{
9353 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9354 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9355 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
9356}
9357
9358
9359#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
9360 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9361 (uintptr_t)a_pfnCImpl, a0, a1, a2)
9362
9363/** Emits code for IEM_MC_CALL_CIMPL_3. */
9364DECL_INLINE_THROW(uint32_t)
9365iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9366 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9367{
9368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9369 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9370 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9371 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
9372}
9373
9374
9375#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
9376 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9377 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
9378
9379/** Emits code for IEM_MC_CALL_CIMPL_4. */
9380DECL_INLINE_THROW(uint32_t)
9381iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9382 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9383{
9384 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9386 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9387 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9388 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
9389}
9390
9391
9392#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
9393 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9394 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
9395
9396/** Emits code for IEM_MC_CALL_CIMPL_4. */
9397DECL_INLINE_THROW(uint32_t)
9398iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9399 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
9400{
9401 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9402 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9403 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9404 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9405 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
9406 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
9407}
9408
9409
9410/** Recompiler debugging: Flush guest register shadow copies. */
9411#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
9412
9413
9414
9415/*********************************************************************************************************************************
9416* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
9417*********************************************************************************************************************************/
9418
9419/**
9420 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
9421 */
9422DECL_INLINE_THROW(uint32_t)
9423iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9424 uintptr_t pfnAImpl, uint8_t cArgs)
9425{
9426 if (idxVarRc != UINT8_MAX)
9427 {
9428 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
9429 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
9430 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
9431 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
9432 }
9433
9434 /*
9435 * Do all the call setup and cleanup.
9436 */
9437 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
9438
9439 /*
9440 * Make the call and update the return code variable if we've got one.
9441 */
9442 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9443 if (idxVarRc != UINT8_MAX)
9444 {
9445off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
9446 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
9447 }
9448
9449 return off;
9450}
9451
9452
9453
9454#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
9455 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
9456
9457#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
9458 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
9459
9460/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
9461DECL_INLINE_THROW(uint32_t)
9462iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
9463{
9464 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
9465}
9466
9467
9468#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
9469 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
9470
9471#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
9472 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
9473
9474/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
9475DECL_INLINE_THROW(uint32_t)
9476iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
9477{
9478 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9479 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
9480}
9481
9482
9483#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
9484 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
9485
9486#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
9487 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
9488
9489/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
9490DECL_INLINE_THROW(uint32_t)
9491iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9492 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9493{
9494 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9495 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9496 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
9497}
9498
9499
9500#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
9501 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
9502
9503#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
9504 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
9505
9506/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
9507DECL_INLINE_THROW(uint32_t)
9508iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9509 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9510{
9511 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9512 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9513 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9514 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
9515}
9516
9517
9518#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
9519 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9520
9521#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
9522 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9523
9524/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
9525DECL_INLINE_THROW(uint32_t)
9526iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9527 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9528{
9529 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9530 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9531 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9532 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
9533 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
9534}
9535
9536
9537
9538/*********************************************************************************************************************************
9539* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
9540*********************************************************************************************************************************/
9541
9542#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
9543 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
9544
9545#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9546 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
9547
9548#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9549 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
9550
9551#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9552 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
9553
9554
9555/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
9556 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
9557DECL_INLINE_THROW(uint32_t)
9558iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
9559{
9560 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9561 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9562 Assert(iGRegEx < 20);
9563
9564 /* Same discussion as in iemNativeEmitFetchGregU16 */
9565 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9566 kIemNativeGstRegUse_ReadOnly);
9567
9568 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9569 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9570
9571 /* The value is zero-extended to the full 64-bit host register width. */
9572 if (iGRegEx < 16)
9573 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9574 else
9575 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9576
9577 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9578 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9579 return off;
9580}
9581
9582
9583#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9584 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
9585
9586#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9587 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
9588
9589#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9590 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
9591
9592/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
9593DECL_INLINE_THROW(uint32_t)
9594iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
9595{
9596 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9597 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9598 Assert(iGRegEx < 20);
9599
9600 /* Same discussion as in iemNativeEmitFetchGregU16 */
9601 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9602 kIemNativeGstRegUse_ReadOnly);
9603
9604 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9605 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9606
9607 if (iGRegEx < 16)
9608 {
9609 switch (cbSignExtended)
9610 {
9611 case sizeof(uint16_t):
9612 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9613 break;
9614 case sizeof(uint32_t):
9615 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9616 break;
9617 case sizeof(uint64_t):
9618 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9619 break;
9620 default: AssertFailed(); break;
9621 }
9622 }
9623 else
9624 {
9625 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9626 switch (cbSignExtended)
9627 {
9628 case sizeof(uint16_t):
9629 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9630 break;
9631 case sizeof(uint32_t):
9632 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9633 break;
9634 case sizeof(uint64_t):
9635 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9636 break;
9637 default: AssertFailed(); break;
9638 }
9639 }
9640
9641 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9642 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9643 return off;
9644}
9645
9646
9647
9648#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
9649 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
9650
9651#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
9652 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9653
9654#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
9655 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9656
9657/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
9658DECL_INLINE_THROW(uint32_t)
9659iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9660{
9661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9662 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9663 Assert(iGReg < 16);
9664
9665 /*
9666 * We can either just load the low 16-bit of the GPR into a host register
9667 * for the variable, or we can do so via a shadow copy host register. The
9668 * latter will avoid having to reload it if it's being stored later, but
9669 * will waste a host register if it isn't touched again. Since we don't
9670 * know what going to happen, we choose the latter for now.
9671 */
9672 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9673 kIemNativeGstRegUse_ReadOnly);
9674
9675 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9676 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9677 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9678 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9679
9680 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9681 return off;
9682}
9683
9684
9685#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
9686 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9687
9688#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
9689 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9690
9691/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
9692DECL_INLINE_THROW(uint32_t)
9693iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
9694{
9695 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9696 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9697 Assert(iGReg < 16);
9698
9699 /*
9700 * We can either just load the low 16-bit of the GPR into a host register
9701 * for the variable, or we can do so via a shadow copy host register. The
9702 * latter will avoid having to reload it if it's being stored later, but
9703 * will waste a host register if it isn't touched again. Since we don't
9704 * know what going to happen, we choose the latter for now.
9705 */
9706 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9707 kIemNativeGstRegUse_ReadOnly);
9708
9709 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9710 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9711 if (cbSignExtended == sizeof(uint32_t))
9712 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9713 else
9714 {
9715 Assert(cbSignExtended == sizeof(uint64_t));
9716 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9717 }
9718 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9719
9720 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9721 return off;
9722}
9723
9724
9725#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
9726 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
9727
9728#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
9729 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
9730
9731/** Emits code for IEM_MC_FETCH_GREG_U32. */
9732DECL_INLINE_THROW(uint32_t)
9733iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9734{
9735 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9736 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9737 Assert(iGReg < 16);
9738
9739 /*
9740 * We can either just load the low 16-bit of the GPR into a host register
9741 * for the variable, or we can do so via a shadow copy host register. The
9742 * latter will avoid having to reload it if it's being stored later, but
9743 * will waste a host register if it isn't touched again. Since we don't
9744 * know what going to happen, we choose the latter for now.
9745 */
9746 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9747 kIemNativeGstRegUse_ReadOnly);
9748
9749 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9750 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9751 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9752 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9753
9754 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9755 return off;
9756}
9757
9758
9759#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
9760 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
9761
9762/** Emits code for IEM_MC_FETCH_GREG_U32. */
9763DECL_INLINE_THROW(uint32_t)
9764iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9765{
9766 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9767 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9768 Assert(iGReg < 16);
9769
9770 /*
9771 * We can either just load the low 32-bit of the GPR into a host register
9772 * for the variable, or we can do so via a shadow copy host register. The
9773 * latter will avoid having to reload it if it's being stored later, but
9774 * will waste a host register if it isn't touched again. Since we don't
9775 * know what going to happen, we choose the latter for now.
9776 */
9777 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9778 kIemNativeGstRegUse_ReadOnly);
9779
9780 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9781 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9782 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9783 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9784
9785 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9786 return off;
9787}
9788
9789
9790#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
9791 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9792
9793#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
9794 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9795
9796/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
9797 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
9798DECL_INLINE_THROW(uint32_t)
9799iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9800{
9801 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9802 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9803 Assert(iGReg < 16);
9804
9805 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9806 kIemNativeGstRegUse_ReadOnly);
9807
9808 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9809 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9810 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
9811 /** @todo name the register a shadow one already? */
9812 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9813
9814 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9815 return off;
9816}
9817
9818
9819
9820/*********************************************************************************************************************************
9821* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
9822*********************************************************************************************************************************/
9823
9824#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
9825 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
9826
9827/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
9828DECL_INLINE_THROW(uint32_t)
9829iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
9830{
9831 Assert(iGRegEx < 20);
9832 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9833 kIemNativeGstRegUse_ForUpdate);
9834#ifdef RT_ARCH_AMD64
9835 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9836
9837 /* To the lowest byte of the register: mov r8, imm8 */
9838 if (iGRegEx < 16)
9839 {
9840 if (idxGstTmpReg >= 8)
9841 pbCodeBuf[off++] = X86_OP_REX_B;
9842 else if (idxGstTmpReg >= 4)
9843 pbCodeBuf[off++] = X86_OP_REX;
9844 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9845 pbCodeBuf[off++] = u8Value;
9846 }
9847 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
9848 else if (idxGstTmpReg < 4)
9849 {
9850 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
9851 pbCodeBuf[off++] = u8Value;
9852 }
9853 else
9854 {
9855 /* ror reg64, 8 */
9856 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9857 pbCodeBuf[off++] = 0xc1;
9858 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9859 pbCodeBuf[off++] = 8;
9860
9861 /* mov reg8, imm8 */
9862 if (idxGstTmpReg >= 8)
9863 pbCodeBuf[off++] = X86_OP_REX_B;
9864 else if (idxGstTmpReg >= 4)
9865 pbCodeBuf[off++] = X86_OP_REX;
9866 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9867 pbCodeBuf[off++] = u8Value;
9868
9869 /* rol reg64, 8 */
9870 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9871 pbCodeBuf[off++] = 0xc1;
9872 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9873 pbCodeBuf[off++] = 8;
9874 }
9875
9876#elif defined(RT_ARCH_ARM64)
9877 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9878 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9879 if (iGRegEx < 16)
9880 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9881 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9882 else
9883 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9884 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9885 iemNativeRegFreeTmp(pReNative, idxImmReg);
9886
9887#else
9888# error "Port me!"
9889#endif
9890
9891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9892
9893 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9894
9895 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9896 return off;
9897}
9898
9899
9900#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9901 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9902
9903/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9904DECL_INLINE_THROW(uint32_t)
9905iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9906{
9907 Assert(iGRegEx < 20);
9908 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9909
9910 /*
9911 * If it's a constant value (unlikely) we treat this as a
9912 * IEM_MC_STORE_GREG_U8_CONST statement.
9913 */
9914 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
9915 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
9916 { /* likely */ }
9917 else
9918 {
9919 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
9920 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9921 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
9922 }
9923
9924 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9925 kIemNativeGstRegUse_ForUpdate);
9926 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9927
9928#ifdef RT_ARCH_AMD64
9929 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
9930 if (iGRegEx < 16)
9931 {
9932 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
9933 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9934 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9935 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9936 pbCodeBuf[off++] = X86_OP_REX;
9937 pbCodeBuf[off++] = 0x8a;
9938 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9939 }
9940 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
9941 else if (idxGstTmpReg < 4 && idxVarReg < 4)
9942 {
9943 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
9944 pbCodeBuf[off++] = 0x8a;
9945 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
9946 }
9947 else
9948 {
9949 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
9950
9951 /* ror reg64, 8 */
9952 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9953 pbCodeBuf[off++] = 0xc1;
9954 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9955 pbCodeBuf[off++] = 8;
9956
9957 /* mov reg8, reg8(r/m) */
9958 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9959 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9960 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9961 pbCodeBuf[off++] = X86_OP_REX;
9962 pbCodeBuf[off++] = 0x8a;
9963 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9964
9965 /* rol reg64, 8 */
9966 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9967 pbCodeBuf[off++] = 0xc1;
9968 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9969 pbCodeBuf[off++] = 8;
9970 }
9971
9972#elif defined(RT_ARCH_ARM64)
9973 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
9974 or
9975 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
9976 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9977 if (iGRegEx < 16)
9978 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
9979 else
9980 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
9981
9982#else
9983# error "Port me!"
9984#endif
9985 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9986
9987 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9988
9989 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9990 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9991 return off;
9992}
9993
9994
9995
9996#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
9997 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
9998
9999/** Emits code for IEM_MC_STORE_GREG_U16. */
10000DECL_INLINE_THROW(uint32_t)
10001iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
10002{
10003 Assert(iGReg < 16);
10004 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10005 kIemNativeGstRegUse_ForUpdate);
10006#ifdef RT_ARCH_AMD64
10007 /* mov reg16, imm16 */
10008 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
10009 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10010 if (idxGstTmpReg >= 8)
10011 pbCodeBuf[off++] = X86_OP_REX_B;
10012 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
10013 pbCodeBuf[off++] = RT_BYTE1(uValue);
10014 pbCodeBuf[off++] = RT_BYTE2(uValue);
10015
10016#elif defined(RT_ARCH_ARM64)
10017 /* movk xdst, #uValue, lsl #0 */
10018 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10019 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
10020
10021#else
10022# error "Port me!"
10023#endif
10024
10025 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10026
10027 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10028 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10029 return off;
10030}
10031
10032
10033#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
10034 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
10035
10036/** Emits code for IEM_MC_STORE_GREG_U16. */
10037DECL_INLINE_THROW(uint32_t)
10038iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10039{
10040 Assert(iGReg < 16);
10041 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10042
10043 /*
10044 * If it's a constant value (unlikely) we treat this as a
10045 * IEM_MC_STORE_GREG_U16_CONST statement.
10046 */
10047 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10048 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10049 { /* likely */ }
10050 else
10051 {
10052 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10053 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10054 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
10055 }
10056
10057 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10058 kIemNativeGstRegUse_ForUpdate);
10059
10060#ifdef RT_ARCH_AMD64
10061 /* mov reg16, reg16 or [mem16] */
10062 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
10063 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10064 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10065 {
10066 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
10067 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
10068 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
10069 pbCodeBuf[off++] = 0x8b;
10070 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
10071 }
10072 else
10073 {
10074 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
10075 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10076 if (idxGstTmpReg >= 8)
10077 pbCodeBuf[off++] = X86_OP_REX_R;
10078 pbCodeBuf[off++] = 0x8b;
10079 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
10080 }
10081
10082#elif defined(RT_ARCH_ARM64)
10083 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
10084 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
10085 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10086 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
10087 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10088
10089#else
10090# error "Port me!"
10091#endif
10092
10093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10094
10095 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10096 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10097 return off;
10098}
10099
10100
10101#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
10102 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
10103
10104/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
10105DECL_INLINE_THROW(uint32_t)
10106iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
10107{
10108 Assert(iGReg < 16);
10109 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10110 kIemNativeGstRegUse_ForFullWrite);
10111 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
10112 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10113 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10114 return off;
10115}
10116
10117
10118#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
10119 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
10120
10121/** Emits code for IEM_MC_STORE_GREG_U32. */
10122DECL_INLINE_THROW(uint32_t)
10123iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10124{
10125 Assert(iGReg < 16);
10126 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10127
10128 /*
10129 * If it's a constant value (unlikely) we treat this as a
10130 * IEM_MC_STORE_GREG_U32_CONST statement.
10131 */
10132 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10133 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10134 { /* likely */ }
10135 else
10136 {
10137 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10138 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10139 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
10140 }
10141
10142 /*
10143 * For the rest we allocate a guest register for the variable and writes
10144 * it to the CPUMCTX structure.
10145 */
10146 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
10147 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10148#ifdef VBOX_STRICT
10149 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
10150#endif
10151 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10152 return off;
10153}
10154
10155
10156#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
10157 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
10158
10159/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
10160DECL_INLINE_THROW(uint32_t)
10161iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
10162{
10163 Assert(iGReg < 16);
10164 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10165 kIemNativeGstRegUse_ForFullWrite);
10166 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
10167 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10168 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10169 return off;
10170}
10171
10172
10173#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
10174 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
10175
10176/** Emits code for IEM_MC_STORE_GREG_U64. */
10177DECL_INLINE_THROW(uint32_t)
10178iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10179{
10180 Assert(iGReg < 16);
10181 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10182
10183 /*
10184 * If it's a constant value (unlikely) we treat this as a
10185 * IEM_MC_STORE_GREG_U64_CONST statement.
10186 */
10187 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10188 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10189 { /* likely */ }
10190 else
10191 {
10192 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10193 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10194 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
10195 }
10196
10197 /*
10198 * For the rest we allocate a guest register for the variable and writes
10199 * it to the CPUMCTX structure.
10200 */
10201 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
10202 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10203 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10204 return off;
10205}
10206
10207
10208#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
10209 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
10210
10211/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
10212DECL_INLINE_THROW(uint32_t)
10213iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
10214{
10215 Assert(iGReg < 16);
10216 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10217 kIemNativeGstRegUse_ForUpdate);
10218 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
10219 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10220 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10221 return off;
10222}
10223
10224
10225/*********************************************************************************************************************************
10226* General purpose register manipulation (add, sub). *
10227*********************************************************************************************************************************/
10228
10229#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
10230 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
10231
10232/** Emits code for IEM_MC_ADD_GREG_U16. */
10233DECL_INLINE_THROW(uint32_t)
10234iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
10235{
10236 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10237 kIemNativeGstRegUse_ForUpdate);
10238
10239#ifdef RT_ARCH_AMD64
10240 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
10241 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10242 if (idxGstTmpReg >= 8)
10243 pbCodeBuf[off++] = X86_OP_REX_B;
10244 if (uAddend == 1)
10245 {
10246 pbCodeBuf[off++] = 0xff; /* inc */
10247 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10248 }
10249 else
10250 {
10251 pbCodeBuf[off++] = 0x81;
10252 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10253 pbCodeBuf[off++] = uAddend;
10254 pbCodeBuf[off++] = 0;
10255 }
10256
10257#else
10258 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
10259 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10260
10261 /* sub tmp, gstgrp, uAddend */
10262 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
10263
10264 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
10265 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
10266
10267 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10268#endif
10269
10270 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10271
10272 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10273
10274 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10275 return off;
10276}
10277
10278
10279#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
10280 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10281
10282#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
10283 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10284
10285/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
10286DECL_INLINE_THROW(uint32_t)
10287iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
10288{
10289 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10290 kIemNativeGstRegUse_ForUpdate);
10291
10292#ifdef RT_ARCH_AMD64
10293 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10294 if (f64Bit)
10295 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10296 else if (idxGstTmpReg >= 8)
10297 pbCodeBuf[off++] = X86_OP_REX_B;
10298 if (uAddend == 1)
10299 {
10300 pbCodeBuf[off++] = 0xff; /* inc */
10301 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10302 }
10303 else if (uAddend < 128)
10304 {
10305 pbCodeBuf[off++] = 0x83; /* add */
10306 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10307 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10308 }
10309 else
10310 {
10311 pbCodeBuf[off++] = 0x81; /* add */
10312 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10313 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10314 pbCodeBuf[off++] = 0;
10315 pbCodeBuf[off++] = 0;
10316 pbCodeBuf[off++] = 0;
10317 }
10318
10319#else
10320 /* sub tmp, gstgrp, uAddend */
10321 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10322 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
10323
10324#endif
10325
10326 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10327
10328 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10329
10330 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10331 return off;
10332}
10333
10334
10335
10336#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
10337 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
10338
10339/** Emits code for IEM_MC_SUB_GREG_U16. */
10340DECL_INLINE_THROW(uint32_t)
10341iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
10342{
10343 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10344 kIemNativeGstRegUse_ForUpdate);
10345
10346#ifdef RT_ARCH_AMD64
10347 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
10348 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10349 if (idxGstTmpReg >= 8)
10350 pbCodeBuf[off++] = X86_OP_REX_B;
10351 if (uSubtrahend == 1)
10352 {
10353 pbCodeBuf[off++] = 0xff; /* dec */
10354 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10355 }
10356 else
10357 {
10358 pbCodeBuf[off++] = 0x81;
10359 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10360 pbCodeBuf[off++] = uSubtrahend;
10361 pbCodeBuf[off++] = 0;
10362 }
10363
10364#else
10365 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
10366 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10367
10368 /* sub tmp, gstgrp, uSubtrahend */
10369 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
10370
10371 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
10372 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
10373
10374 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10375#endif
10376
10377 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10378
10379 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10380
10381 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10382 return off;
10383}
10384
10385
10386#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
10387 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10388
10389#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
10390 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10391
10392/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
10393DECL_INLINE_THROW(uint32_t)
10394iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
10395{
10396 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10397 kIemNativeGstRegUse_ForUpdate);
10398
10399#ifdef RT_ARCH_AMD64
10400 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10401 if (f64Bit)
10402 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10403 else if (idxGstTmpReg >= 8)
10404 pbCodeBuf[off++] = X86_OP_REX_B;
10405 if (uSubtrahend == 1)
10406 {
10407 pbCodeBuf[off++] = 0xff; /* dec */
10408 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10409 }
10410 else if (uSubtrahend < 128)
10411 {
10412 pbCodeBuf[off++] = 0x83; /* sub */
10413 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10414 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10415 }
10416 else
10417 {
10418 pbCodeBuf[off++] = 0x81; /* sub */
10419 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10420 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10421 pbCodeBuf[off++] = 0;
10422 pbCodeBuf[off++] = 0;
10423 pbCodeBuf[off++] = 0;
10424 }
10425
10426#else
10427 /* sub tmp, gstgrp, uSubtrahend */
10428 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10429 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
10430
10431#endif
10432
10433 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10434
10435 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10436
10437 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10438 return off;
10439}
10440
10441
10442/*********************************************************************************************************************************
10443* Local variable manipulation (add, sub, and, or). *
10444*********************************************************************************************************************************/
10445
10446#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
10447 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10448
10449#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
10450 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10451
10452#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
10453 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10454
10455#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
10456 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10457
10458/** Emits code for AND'ing a local and a constant value. */
10459DECL_INLINE_THROW(uint32_t)
10460iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10461{
10462#ifdef VBOX_STRICT
10463 switch (cbMask)
10464 {
10465 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10466 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10467 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10468 case sizeof(uint64_t): break;
10469 default: AssertFailedBreak();
10470 }
10471#endif
10472
10473 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10474 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10475
10476 if (cbMask <= sizeof(uint32_t))
10477 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
10478 else
10479 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
10480
10481 iemNativeVarRegisterRelease(pReNative, idxVar);
10482 return off;
10483}
10484
10485
10486#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
10487 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10488
10489#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
10490 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10491
10492#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
10493 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10494
10495#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
10496 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10497
10498/** Emits code for OR'ing a local and a constant value. */
10499DECL_INLINE_THROW(uint32_t)
10500iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10501{
10502#ifdef VBOX_STRICT
10503 switch (cbMask)
10504 {
10505 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10506 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10507 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10508 case sizeof(uint64_t): break;
10509 default: AssertFailedBreak();
10510 }
10511#endif
10512
10513 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10514 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10515
10516 if (cbMask <= sizeof(uint32_t))
10517 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
10518 else
10519 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
10520
10521 iemNativeVarRegisterRelease(pReNative, idxVar);
10522 return off;
10523}
10524
10525
10526#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
10527 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
10528
10529#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
10530 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
10531
10532#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
10533 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
10534
10535/** Emits code for reversing the byte order in a local value. */
10536DECL_INLINE_THROW(uint32_t)
10537iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
10538{
10539 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10540 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
10541
10542 switch (cbLocal)
10543 {
10544 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
10545 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
10546 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
10547 default: AssertFailedBreak();
10548 }
10549
10550 iemNativeVarRegisterRelease(pReNative, idxVar);
10551 return off;
10552}
10553
10554
10555
10556/*********************************************************************************************************************************
10557* EFLAGS *
10558*********************************************************************************************************************************/
10559
10560#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10561# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
10562#else
10563# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
10564 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
10565
10566DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
10567{
10568 if (fEflOutput)
10569 {
10570 PVMCPUCC const pVCpu = pReNative->pVCpu;
10571# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10572 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
10573 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
10574 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
10575# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10576 if (fEflOutput & (a_fEfl)) \
10577 { \
10578 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
10579 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10580 else \
10581 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10582 } else do { } while (0)
10583# else
10584 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
10585 IEMLIVENESSBIT const LivenessClobbered =
10586 {
10587 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10588 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10589 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10590 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10591 };
10592 IEMLIVENESSBIT const LivenessDelayable =
10593 {
10594 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10595 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10596 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10597 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10598 };
10599# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10600 if (fEflOutput & (a_fEfl)) \
10601 { \
10602 if (LivenessClobbered.a_fLivenessMember) \
10603 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10604 else if (LivenessDelayable.a_fLivenessMember) \
10605 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
10606 else \
10607 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10608 } else do { } while (0)
10609# endif
10610 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
10611 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
10612 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
10613 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
10614 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
10615 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
10616 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
10617# undef CHECK_FLAG_AND_UPDATE_STATS
10618 }
10619 RT_NOREF(fEflInput);
10620}
10621#endif /* VBOX_WITH_STATISTICS */
10622
10623#undef IEM_MC_FETCH_EFLAGS /* should not be used */
10624#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10625 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
10626
10627/** Handles IEM_MC_FETCH_EFLAGS_EX. */
10628DECL_INLINE_THROW(uint32_t)
10629iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
10630 uint32_t fEflInput, uint32_t fEflOutput)
10631{
10632 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
10633 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10634 RT_NOREF(fEflInput, fEflOutput);
10635
10636#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10637# ifdef VBOX_STRICT
10638 if ( pReNative->idxCurCall != 0
10639 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
10640 {
10641 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
10642 uint32_t const fBoth = fEflInput | fEflOutput;
10643# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
10644 AssertMsg( !(fBoth & (a_fElfConst)) \
10645 || (!(fEflInput & (a_fElfConst)) \
10646 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10647 : !(fEflOutput & (a_fElfConst)) \
10648 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10649 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
10650 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
10651 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
10652 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
10653 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
10654 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
10655 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
10656 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
10657 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
10658# undef ASSERT_ONE_EFL
10659 }
10660# endif
10661#endif
10662
10663 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
10664 * the existing shadow copy. */
10665 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
10666 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10667 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
10668 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10669 return off;
10670}
10671
10672
10673
10674/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
10675 * start using it with custom native code emission (inlining assembly
10676 * instruction helpers). */
10677#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
10678#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10679 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10680 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
10681
10682/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
10683DECL_INLINE_THROW(uint32_t)
10684iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
10685{
10686 RT_NOREF(fEflOutput);
10687 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
10688 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10689
10690#ifdef VBOX_STRICT
10691 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
10692 uint32_t offFixup = off;
10693 off = iemNativeEmitJnzToFixed(pReNative, off, off);
10694 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
10695 iemNativeFixupFixedJump(pReNative, offFixup, off);
10696
10697 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
10698 offFixup = off;
10699 off = iemNativeEmitJzToFixed(pReNative, off, off);
10700 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
10701 iemNativeFixupFixedJump(pReNative, offFixup, off);
10702
10703 /** @todo validate that only bits in the fElfOutput mask changed. */
10704#endif
10705
10706 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10707 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
10708 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10709 return off;
10710}
10711
10712
10713
10714/*********************************************************************************************************************************
10715* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
10716*********************************************************************************************************************************/
10717
10718#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
10719 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
10720
10721#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
10722 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
10723
10724#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
10725 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
10726
10727
10728/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
10729 * IEM_MC_FETCH_SREG_ZX_U64. */
10730DECL_INLINE_THROW(uint32_t)
10731iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
10732{
10733 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10734 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
10735 Assert(iSReg < X86_SREG_COUNT);
10736
10737 /*
10738 * For now, we will not create a shadow copy of a selector. The rational
10739 * is that since we do not recompile the popping and loading of segment
10740 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
10741 * pushing and moving to registers, there is only a small chance that the
10742 * shadow copy will be accessed again before the register is reloaded. One
10743 * scenario would be nested called in 16-bit code, but I doubt it's worth
10744 * the extra register pressure atm.
10745 *
10746 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
10747 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
10748 * store scencario covered at present (r160730).
10749 */
10750 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10751 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10752 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
10753 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10754 return off;
10755}
10756
10757
10758
10759/*********************************************************************************************************************************
10760* Register references. *
10761*********************************************************************************************************************************/
10762
10763#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
10764 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
10765
10766#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
10767 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
10768
10769/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
10770DECL_INLINE_THROW(uint32_t)
10771iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
10772{
10773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10775 Assert(iGRegEx < 20);
10776
10777 if (iGRegEx < 16)
10778 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10779 else
10780 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
10781
10782 /* If we've delayed writing back the register value, flush it now. */
10783 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10784
10785 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10786 if (!fConst)
10787 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
10788
10789 return off;
10790}
10791
10792#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
10793 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
10794
10795#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
10796 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
10797
10798#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
10799 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
10800
10801#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
10802 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
10803
10804#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
10805 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
10806
10807#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
10808 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
10809
10810#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
10811 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
10812
10813#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
10814 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
10815
10816#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
10817 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
10818
10819#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
10820 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
10821
10822/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
10823DECL_INLINE_THROW(uint32_t)
10824iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
10825{
10826 Assert(iGReg < 16);
10827 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
10828 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10829
10830 /* If we've delayed writing back the register value, flush it now. */
10831 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
10832
10833 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10834 if (!fConst)
10835 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
10836
10837 return off;
10838}
10839
10840
10841#undef IEM_MC_REF_EFLAGS /* should not be used. */
10842#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
10843 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10844 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
10845
10846/** Handles IEM_MC_REF_EFLAGS. */
10847DECL_INLINE_THROW(uint32_t)
10848iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10849{
10850 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
10851 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10852
10853 /* If we've delayed writing back the register value, flush it now. */
10854 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
10855
10856 /* If there is a shadow copy of guest EFLAGS, flush it now. */
10857 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
10858
10859 return off;
10860}
10861
10862
10863/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
10864 * different code from threaded recompiler, maybe it would be helpful. For now
10865 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
10866#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
10867
10868
10869#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
10870 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
10871
10872#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
10873 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
10874
10875#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
10876 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
10877
10878/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
10879DECL_INLINE_THROW(uint32_t)
10880iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
10881{
10882 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10883 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10884 Assert(iXReg < 16);
10885
10886 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
10887
10888 /* If we've delayed writing back the register value, flush it now. */
10889 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
10890
10891 /** @todo r=aeichner This needs to be done as soon as we shadow SSE registers in host registers, needs
10892 * figuring out the semantics on how this is tracked.
10893 * For now this is safe though as the reference will directly operate on the CPUMCTX
10894 * structure so the value can't get out of sync.
10895 */
10896#if 0
10897 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10898 if (!fConst)
10899 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_XREG(iXReg)));
10900#else
10901 RT_NOREF(fConst);
10902#endif
10903
10904 return off;
10905}
10906
10907
10908#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
10909 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
10910
10911/** Handles IEM_MC_REF_MXCSR. */
10912DECL_INLINE_THROW(uint32_t)
10913iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10914{
10915 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
10916 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10917
10918 /* If we've delayed writing back the register value, flush it now. */
10919 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
10920
10921 /* If there is a shadow copy of guest MXCSR, flush it now. */
10922 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
10923
10924 return off;
10925}
10926
10927
10928
10929/*********************************************************************************************************************************
10930* Effective Address Calculation *
10931*********************************************************************************************************************************/
10932#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
10933 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
10934
10935/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
10936 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
10937DECL_INLINE_THROW(uint32_t)
10938iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10939 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
10940{
10941 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10942
10943 /*
10944 * Handle the disp16 form with no registers first.
10945 *
10946 * Convert to an immediate value, as that'll delay the register allocation
10947 * and assignment till the memory access / call / whatever and we can use
10948 * a more appropriate register (or none at all).
10949 */
10950 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
10951 {
10952 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
10953 return off;
10954 }
10955
10956 /* Determin the displacment. */
10957 uint16_t u16EffAddr;
10958 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10959 {
10960 case 0: u16EffAddr = 0; break;
10961 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
10962 case 2: u16EffAddr = u16Disp; break;
10963 default: AssertFailedStmt(u16EffAddr = 0);
10964 }
10965
10966 /* Determine the registers involved. */
10967 uint8_t idxGstRegBase;
10968 uint8_t idxGstRegIndex;
10969 switch (bRm & X86_MODRM_RM_MASK)
10970 {
10971 case 0:
10972 idxGstRegBase = X86_GREG_xBX;
10973 idxGstRegIndex = X86_GREG_xSI;
10974 break;
10975 case 1:
10976 idxGstRegBase = X86_GREG_xBX;
10977 idxGstRegIndex = X86_GREG_xDI;
10978 break;
10979 case 2:
10980 idxGstRegBase = X86_GREG_xBP;
10981 idxGstRegIndex = X86_GREG_xSI;
10982 break;
10983 case 3:
10984 idxGstRegBase = X86_GREG_xBP;
10985 idxGstRegIndex = X86_GREG_xDI;
10986 break;
10987 case 4:
10988 idxGstRegBase = X86_GREG_xSI;
10989 idxGstRegIndex = UINT8_MAX;
10990 break;
10991 case 5:
10992 idxGstRegBase = X86_GREG_xDI;
10993 idxGstRegIndex = UINT8_MAX;
10994 break;
10995 case 6:
10996 idxGstRegBase = X86_GREG_xBP;
10997 idxGstRegIndex = UINT8_MAX;
10998 break;
10999#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
11000 default:
11001#endif
11002 case 7:
11003 idxGstRegBase = X86_GREG_xBX;
11004 idxGstRegIndex = UINT8_MAX;
11005 break;
11006 }
11007
11008 /*
11009 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
11010 */
11011 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11012 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11013 kIemNativeGstRegUse_ReadOnly);
11014 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
11015 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11016 kIemNativeGstRegUse_ReadOnly)
11017 : UINT8_MAX;
11018#ifdef RT_ARCH_AMD64
11019 if (idxRegIndex == UINT8_MAX)
11020 {
11021 if (u16EffAddr == 0)
11022 {
11023 /* movxz ret, base */
11024 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
11025 }
11026 else
11027 {
11028 /* lea ret32, [base64 + disp32] */
11029 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11030 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11031 if (idxRegRet >= 8 || idxRegBase >= 8)
11032 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
11033 pbCodeBuf[off++] = 0x8d;
11034 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11035 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
11036 else
11037 {
11038 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
11039 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11040 }
11041 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
11042 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
11043 pbCodeBuf[off++] = 0;
11044 pbCodeBuf[off++] = 0;
11045 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11046
11047 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
11048 }
11049 }
11050 else
11051 {
11052 /* lea ret32, [index64 + base64 (+ disp32)] */
11053 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11054 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11055 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11056 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11057 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11058 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11059 pbCodeBuf[off++] = 0x8d;
11060 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
11061 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11062 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
11063 if (bMod == X86_MOD_MEM4)
11064 {
11065 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
11066 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
11067 pbCodeBuf[off++] = 0;
11068 pbCodeBuf[off++] = 0;
11069 }
11070 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11071 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
11072 }
11073
11074#elif defined(RT_ARCH_ARM64)
11075 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11076 if (u16EffAddr == 0)
11077 {
11078 if (idxRegIndex == UINT8_MAX)
11079 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
11080 else
11081 {
11082 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
11083 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
11084 }
11085 }
11086 else
11087 {
11088 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
11089 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
11090 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
11091 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
11092 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
11093 else
11094 {
11095 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
11096 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
11097 }
11098 if (idxRegIndex != UINT8_MAX)
11099 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
11100 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
11101 }
11102
11103#else
11104# error "port me"
11105#endif
11106
11107 if (idxRegIndex != UINT8_MAX)
11108 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11109 iemNativeRegFreeTmp(pReNative, idxRegBase);
11110 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11111 return off;
11112}
11113
11114
11115#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
11116 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
11117
11118/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
11119 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
11120DECL_INLINE_THROW(uint32_t)
11121iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
11122 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
11123{
11124 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
11125
11126 /*
11127 * Handle the disp32 form with no registers first.
11128 *
11129 * Convert to an immediate value, as that'll delay the register allocation
11130 * and assignment till the memory access / call / whatever and we can use
11131 * a more appropriate register (or none at all).
11132 */
11133 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
11134 {
11135 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
11136 return off;
11137 }
11138
11139 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
11140 uint32_t u32EffAddr = 0;
11141 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11142 {
11143 case 0: break;
11144 case 1: u32EffAddr = (int8_t)u32Disp; break;
11145 case 2: u32EffAddr = u32Disp; break;
11146 default: AssertFailed();
11147 }
11148
11149 /* Get the register (or SIB) value. */
11150 uint8_t idxGstRegBase = UINT8_MAX;
11151 uint8_t idxGstRegIndex = UINT8_MAX;
11152 uint8_t cShiftIndex = 0;
11153 switch (bRm & X86_MODRM_RM_MASK)
11154 {
11155 case 0: idxGstRegBase = X86_GREG_xAX; break;
11156 case 1: idxGstRegBase = X86_GREG_xCX; break;
11157 case 2: idxGstRegBase = X86_GREG_xDX; break;
11158 case 3: idxGstRegBase = X86_GREG_xBX; break;
11159 case 4: /* SIB */
11160 {
11161 /* index /w scaling . */
11162 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
11163 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
11164 {
11165 case 0: idxGstRegIndex = X86_GREG_xAX; break;
11166 case 1: idxGstRegIndex = X86_GREG_xCX; break;
11167 case 2: idxGstRegIndex = X86_GREG_xDX; break;
11168 case 3: idxGstRegIndex = X86_GREG_xBX; break;
11169 case 4: cShiftIndex = 0; /*no index*/ break;
11170 case 5: idxGstRegIndex = X86_GREG_xBP; break;
11171 case 6: idxGstRegIndex = X86_GREG_xSI; break;
11172 case 7: idxGstRegIndex = X86_GREG_xDI; break;
11173 }
11174
11175 /* base */
11176 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
11177 {
11178 case 0: idxGstRegBase = X86_GREG_xAX; break;
11179 case 1: idxGstRegBase = X86_GREG_xCX; break;
11180 case 2: idxGstRegBase = X86_GREG_xDX; break;
11181 case 3: idxGstRegBase = X86_GREG_xBX; break;
11182 case 4:
11183 idxGstRegBase = X86_GREG_xSP;
11184 u32EffAddr += uSibAndRspOffset >> 8;
11185 break;
11186 case 5:
11187 if ((bRm & X86_MODRM_MOD_MASK) != 0)
11188 idxGstRegBase = X86_GREG_xBP;
11189 else
11190 {
11191 Assert(u32EffAddr == 0);
11192 u32EffAddr = u32Disp;
11193 }
11194 break;
11195 case 6: idxGstRegBase = X86_GREG_xSI; break;
11196 case 7: idxGstRegBase = X86_GREG_xDI; break;
11197 }
11198 break;
11199 }
11200 case 5: idxGstRegBase = X86_GREG_xBP; break;
11201 case 6: idxGstRegBase = X86_GREG_xSI; break;
11202 case 7: idxGstRegBase = X86_GREG_xDI; break;
11203 }
11204
11205 /*
11206 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11207 * the start of the function.
11208 */
11209 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11210 {
11211 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
11212 return off;
11213 }
11214
11215 /*
11216 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11217 */
11218 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11219 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11220 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11221 kIemNativeGstRegUse_ReadOnly);
11222 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11223 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11224 kIemNativeGstRegUse_ReadOnly);
11225
11226 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11227 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11228 {
11229 idxRegBase = idxRegIndex;
11230 idxRegIndex = UINT8_MAX;
11231 }
11232
11233#ifdef RT_ARCH_AMD64
11234 if (idxRegIndex == UINT8_MAX)
11235 {
11236 if (u32EffAddr == 0)
11237 {
11238 /* mov ret, base */
11239 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11240 }
11241 else
11242 {
11243 /* lea ret32, [base64 + disp32] */
11244 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11245 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11246 if (idxRegRet >= 8 || idxRegBase >= 8)
11247 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
11248 pbCodeBuf[off++] = 0x8d;
11249 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11250 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11251 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11252 else
11253 {
11254 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11255 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11256 }
11257 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11258 if (bMod == X86_MOD_MEM4)
11259 {
11260 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11261 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11262 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11263 }
11264 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11265 }
11266 }
11267 else
11268 {
11269 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11270 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11271 if (idxRegBase == UINT8_MAX)
11272 {
11273 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
11274 if (idxRegRet >= 8 || idxRegIndex >= 8)
11275 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11276 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11277 pbCodeBuf[off++] = 0x8d;
11278 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11279 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11280 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11281 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11282 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11283 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11284 }
11285 else
11286 {
11287 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11288 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11289 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11290 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11291 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11292 pbCodeBuf[off++] = 0x8d;
11293 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11294 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11295 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11296 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11297 if (bMod != X86_MOD_MEM0)
11298 {
11299 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11300 if (bMod == X86_MOD_MEM4)
11301 {
11302 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11303 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11304 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11305 }
11306 }
11307 }
11308 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11309 }
11310
11311#elif defined(RT_ARCH_ARM64)
11312 if (u32EffAddr == 0)
11313 {
11314 if (idxRegIndex == UINT8_MAX)
11315 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11316 else if (idxRegBase == UINT8_MAX)
11317 {
11318 if (cShiftIndex == 0)
11319 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
11320 else
11321 {
11322 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11323 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
11324 }
11325 }
11326 else
11327 {
11328 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11329 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11330 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11331 }
11332 }
11333 else
11334 {
11335 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
11336 {
11337 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11338 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
11339 }
11340 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
11341 {
11342 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11343 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
11344 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
11345 }
11346 else
11347 {
11348 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
11349 if (idxRegBase != UINT8_MAX)
11350 {
11351 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11352 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
11353 }
11354 }
11355 if (idxRegIndex != UINT8_MAX)
11356 {
11357 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11358 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11359 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11360 }
11361 }
11362
11363#else
11364# error "port me"
11365#endif
11366
11367 if (idxRegIndex != UINT8_MAX)
11368 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11369 if (idxRegBase != UINT8_MAX)
11370 iemNativeRegFreeTmp(pReNative, idxRegBase);
11371 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11372 return off;
11373}
11374
11375
11376#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11377 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11378 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11379
11380#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11381 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11382 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11383
11384#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11385 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11386 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
11387
11388/**
11389 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
11390 *
11391 * @returns New off.
11392 * @param pReNative .
11393 * @param off .
11394 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
11395 * bit 4 to REX.X. The two bits are part of the
11396 * REG sub-field, which isn't needed in this
11397 * function.
11398 * @param uSibAndRspOffset Two parts:
11399 * - The first 8 bits make up the SIB byte.
11400 * - The next 8 bits are the fixed RSP/ESP offset
11401 * in case of a pop [xSP].
11402 * @param u32Disp The displacement byte/word/dword, if any.
11403 * @param cbInstr The size of the fully decoded instruction. Used
11404 * for RIP relative addressing.
11405 * @param idxVarRet The result variable number.
11406 * @param f64Bit Whether to use a 64-bit or 32-bit address size
11407 * when calculating the address.
11408 *
11409 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
11410 */
11411DECL_INLINE_THROW(uint32_t)
11412iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
11413 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
11414{
11415 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
11416
11417 /*
11418 * Special case the rip + disp32 form first.
11419 */
11420 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
11421 {
11422#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
11423 /* Need to take the current PC offset into account for the displacement, no need to flush here
11424 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
11425 u32Disp += pReNative->Core.offPc;
11426#endif
11427
11428 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11429 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
11430 kIemNativeGstRegUse_ReadOnly);
11431#ifdef RT_ARCH_AMD64
11432 if (f64Bit)
11433 {
11434 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
11435 if ((int32_t)offFinalDisp == offFinalDisp)
11436 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
11437 else
11438 {
11439 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
11440 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
11441 }
11442 }
11443 else
11444 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
11445
11446#elif defined(RT_ARCH_ARM64)
11447 if (f64Bit)
11448 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11449 (int64_t)(int32_t)u32Disp + cbInstr);
11450 else
11451 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11452 (int32_t)u32Disp + cbInstr);
11453
11454#else
11455# error "Port me!"
11456#endif
11457 iemNativeRegFreeTmp(pReNative, idxRegPc);
11458 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11459 return off;
11460 }
11461
11462 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
11463 int64_t i64EffAddr = 0;
11464 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11465 {
11466 case 0: break;
11467 case 1: i64EffAddr = (int8_t)u32Disp; break;
11468 case 2: i64EffAddr = (int32_t)u32Disp; break;
11469 default: AssertFailed();
11470 }
11471
11472 /* Get the register (or SIB) value. */
11473 uint8_t idxGstRegBase = UINT8_MAX;
11474 uint8_t idxGstRegIndex = UINT8_MAX;
11475 uint8_t cShiftIndex = 0;
11476 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
11477 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
11478 else /* SIB: */
11479 {
11480 /* index /w scaling . */
11481 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
11482 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
11483 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
11484 if (idxGstRegIndex == 4)
11485 {
11486 /* no index */
11487 cShiftIndex = 0;
11488 idxGstRegIndex = UINT8_MAX;
11489 }
11490
11491 /* base */
11492 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
11493 if (idxGstRegBase == 4)
11494 {
11495 /* pop [rsp] hack */
11496 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
11497 }
11498 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
11499 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
11500 {
11501 /* mod=0 and base=5 -> disp32, no base reg. */
11502 Assert(i64EffAddr == 0);
11503 i64EffAddr = (int32_t)u32Disp;
11504 idxGstRegBase = UINT8_MAX;
11505 }
11506 }
11507
11508 /*
11509 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11510 * the start of the function.
11511 */
11512 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11513 {
11514 if (f64Bit)
11515 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
11516 else
11517 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
11518 return off;
11519 }
11520
11521 /*
11522 * Now emit code that calculates:
11523 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11524 * or if !f64Bit:
11525 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11526 */
11527 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11528 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11529 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11530 kIemNativeGstRegUse_ReadOnly);
11531 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11532 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11533 kIemNativeGstRegUse_ReadOnly);
11534
11535 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11536 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11537 {
11538 idxRegBase = idxRegIndex;
11539 idxRegIndex = UINT8_MAX;
11540 }
11541
11542#ifdef RT_ARCH_AMD64
11543 uint8_t bFinalAdj;
11544 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
11545 bFinalAdj = 0; /* likely */
11546 else
11547 {
11548 /* pop [rsp] with a problematic disp32 value. Split out the
11549 RSP offset and add it separately afterwards (bFinalAdj). */
11550 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
11551 Assert(idxGstRegBase == X86_GREG_xSP);
11552 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
11553 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
11554 Assert(bFinalAdj != 0);
11555 i64EffAddr -= bFinalAdj;
11556 Assert((int32_t)i64EffAddr == i64EffAddr);
11557 }
11558 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
11559//pReNative->pInstrBuf[off++] = 0xcc;
11560
11561 if (idxRegIndex == UINT8_MAX)
11562 {
11563 if (u32EffAddr == 0)
11564 {
11565 /* mov ret, base */
11566 if (f64Bit)
11567 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
11568 else
11569 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11570 }
11571 else
11572 {
11573 /* lea ret, [base + disp32] */
11574 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11575 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11576 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
11577 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11578 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11579 | (f64Bit ? X86_OP_REX_W : 0);
11580 pbCodeBuf[off++] = 0x8d;
11581 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11582 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11583 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11584 else
11585 {
11586 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11587 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11588 }
11589 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11590 if (bMod == X86_MOD_MEM4)
11591 {
11592 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11593 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11594 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11595 }
11596 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11597 }
11598 }
11599 else
11600 {
11601 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11602 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11603 if (idxRegBase == UINT8_MAX)
11604 {
11605 /* lea ret, [(index64 << cShiftIndex) + disp32] */
11606 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
11607 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11608 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11609 | (f64Bit ? X86_OP_REX_W : 0);
11610 pbCodeBuf[off++] = 0x8d;
11611 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11612 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11613 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11614 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11615 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11616 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11617 }
11618 else
11619 {
11620 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11621 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11622 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11623 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11624 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11625 | (f64Bit ? X86_OP_REX_W : 0);
11626 pbCodeBuf[off++] = 0x8d;
11627 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11628 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11629 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11630 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11631 if (bMod != X86_MOD_MEM0)
11632 {
11633 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11634 if (bMod == X86_MOD_MEM4)
11635 {
11636 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11637 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11638 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11639 }
11640 }
11641 }
11642 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11643 }
11644
11645 if (!bFinalAdj)
11646 { /* likely */ }
11647 else
11648 {
11649 Assert(f64Bit);
11650 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
11651 }
11652
11653#elif defined(RT_ARCH_ARM64)
11654 if (i64EffAddr == 0)
11655 {
11656 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11657 if (idxRegIndex == UINT8_MAX)
11658 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
11659 else if (idxRegBase != UINT8_MAX)
11660 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11661 f64Bit, false /*fSetFlags*/, cShiftIndex);
11662 else
11663 {
11664 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
11665 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
11666 }
11667 }
11668 else
11669 {
11670 if (f64Bit)
11671 { /* likely */ }
11672 else
11673 i64EffAddr = (int32_t)i64EffAddr;
11674
11675 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
11676 {
11677 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11678 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
11679 }
11680 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
11681 {
11682 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11683 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
11684 }
11685 else
11686 {
11687 if (f64Bit)
11688 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
11689 else
11690 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
11691 if (idxRegBase != UINT8_MAX)
11692 {
11693 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11694 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
11695 }
11696 }
11697 if (idxRegIndex != UINT8_MAX)
11698 {
11699 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11700 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11701 f64Bit, false /*fSetFlags*/, cShiftIndex);
11702 }
11703 }
11704
11705#else
11706# error "port me"
11707#endif
11708
11709 if (idxRegIndex != UINT8_MAX)
11710 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11711 if (idxRegBase != UINT8_MAX)
11712 iemNativeRegFreeTmp(pReNative, idxRegBase);
11713 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11714 return off;
11715}
11716
11717
11718/*********************************************************************************************************************************
11719* TLB Lookup. *
11720*********************************************************************************************************************************/
11721
11722/**
11723 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
11724 */
11725DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
11726{
11727 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
11728 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
11729 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
11730 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
11731
11732 /* Do the lookup manually. */
11733 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
11734 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
11735 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
11736 if (RT_LIKELY(pTlbe->uTag == uTag))
11737 {
11738 /*
11739 * Check TLB page table level access flags.
11740 */
11741 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
11742 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
11743 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
11744 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
11745 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
11746 | IEMTLBE_F_PG_UNASSIGNED
11747 | IEMTLBE_F_PT_NO_ACCESSED
11748 | fNoWriteNoDirty | fNoUser);
11749 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
11750 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
11751 {
11752 /*
11753 * Return the address.
11754 */
11755 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
11756 if ((uintptr_t)pbAddr == uResult)
11757 return;
11758 RT_NOREF(cbMem);
11759 AssertFailed();
11760 }
11761 else
11762 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
11763 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
11764 }
11765 else
11766 AssertFailed();
11767 RT_BREAKPOINT();
11768}
11769
11770/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
11771
11772
11773/*********************************************************************************************************************************
11774* Memory fetches and stores common *
11775*********************************************************************************************************************************/
11776
11777typedef enum IEMNATIVEMITMEMOP
11778{
11779 kIemNativeEmitMemOp_Store = 0,
11780 kIemNativeEmitMemOp_Fetch,
11781 kIemNativeEmitMemOp_Fetch_Zx_U16,
11782 kIemNativeEmitMemOp_Fetch_Zx_U32,
11783 kIemNativeEmitMemOp_Fetch_Zx_U64,
11784 kIemNativeEmitMemOp_Fetch_Sx_U16,
11785 kIemNativeEmitMemOp_Fetch_Sx_U32,
11786 kIemNativeEmitMemOp_Fetch_Sx_U64
11787} IEMNATIVEMITMEMOP;
11788
11789/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
11790 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
11791 * (with iSegReg = UINT8_MAX). */
11792DECL_INLINE_THROW(uint32_t)
11793iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
11794 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
11795 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
11796{
11797 /*
11798 * Assert sanity.
11799 */
11800 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11801 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
11802 Assert( enmOp != kIemNativeEmitMemOp_Store
11803 || pVarValue->enmKind == kIemNativeVarKind_Immediate
11804 || pVarValue->enmKind == kIemNativeVarKind_Stack);
11805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11806 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
11807 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
11808 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
11809 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11810 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11811 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
11812 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11813#ifdef VBOX_STRICT
11814 if (iSegReg == UINT8_MAX)
11815 {
11816 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11817 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11818 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11819 switch (cbMem)
11820 {
11821 case 1:
11822 Assert( pfnFunction
11823 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
11824 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11825 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11826 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11827 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11828 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
11829 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
11830 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
11831 : UINT64_C(0xc000b000a0009000) ));
11832 break;
11833 case 2:
11834 Assert( pfnFunction
11835 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
11836 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11837 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11838 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11839 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
11840 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
11841 : UINT64_C(0xc000b000a0009000) ));
11842 break;
11843 case 4:
11844 Assert( pfnFunction
11845 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
11846 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11847 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11848 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
11849 : UINT64_C(0xc000b000a0009000) ));
11850 break;
11851 case 8:
11852 Assert( pfnFunction
11853 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
11854 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
11855 : UINT64_C(0xc000b000a0009000) ));
11856 break;
11857 }
11858 }
11859 else
11860 {
11861 Assert(iSegReg < 6);
11862 switch (cbMem)
11863 {
11864 case 1:
11865 Assert( pfnFunction
11866 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
11867 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
11868 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11869 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11870 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11871 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
11872 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
11873 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
11874 : UINT64_C(0xc000b000a0009000) ));
11875 break;
11876 case 2:
11877 Assert( pfnFunction
11878 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11879 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11880 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11881 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11882 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11883 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11884 : UINT64_C(0xc000b000a0009000) ));
11885 break;
11886 case 4:
11887 Assert( pfnFunction
11888 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11889 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11890 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11891 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11892 : UINT64_C(0xc000b000a0009000) ));
11893 break;
11894 case 8:
11895 Assert( pfnFunction
11896 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11897 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11898 : UINT64_C(0xc000b000a0009000) ));
11899 break;
11900 }
11901 }
11902#endif
11903
11904#ifdef VBOX_STRICT
11905 /*
11906 * Check that the fExec flags we've got make sense.
11907 */
11908 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11909#endif
11910
11911 /*
11912 * To keep things simple we have to commit any pending writes first as we
11913 * may end up making calls.
11914 */
11915 /** @todo we could postpone this till we make the call and reload the
11916 * registers after returning from the call. Not sure if that's sensible or
11917 * not, though. */
11918#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11919 off = iemNativeRegFlushPendingWrites(pReNative, off);
11920#else
11921 /* The program counter is treated differently for now. */
11922 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
11923#endif
11924
11925#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11926 /*
11927 * Move/spill/flush stuff out of call-volatile registers.
11928 * This is the easy way out. We could contain this to the tlb-miss branch
11929 * by saving and restoring active stuff here.
11930 */
11931 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11932#endif
11933
11934 /*
11935 * Define labels and allocate the result register (trying for the return
11936 * register if we can).
11937 */
11938 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11939 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
11940 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11941 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
11942 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
11943 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
11944 uint8_t const idxRegValueStore = !TlbState.fSkip
11945 && enmOp == kIemNativeEmitMemOp_Store
11946 && pVarValue->enmKind != kIemNativeVarKind_Immediate
11947 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
11948 : UINT8_MAX;
11949 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11950 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11951 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11952 : UINT32_MAX;
11953
11954 /*
11955 * Jump to the TLB lookup code.
11956 */
11957 if (!TlbState.fSkip)
11958 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11959
11960 /*
11961 * TlbMiss:
11962 *
11963 * Call helper to do the fetching.
11964 * We flush all guest register shadow copies here.
11965 */
11966 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11967
11968#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11969 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11970#else
11971 RT_NOREF(idxInstr);
11972#endif
11973
11974#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
11975 if (pReNative->Core.offPc)
11976 {
11977 /*
11978 * Update the program counter but restore it at the end of the TlbMiss branch.
11979 * This should allow delaying more program counter updates for the TlbLookup and hit paths
11980 * which are hopefully much more frequent, reducing the amount of memory accesses.
11981 */
11982 /* Allocate a temporary PC register. */
11983 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
11984
11985 /* Perform the addition and store the result. */
11986 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
11987 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
11988
11989 /* Free and flush the PC register. */
11990 iemNativeRegFreeTmp(pReNative, idxPcReg);
11991 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
11992 }
11993#endif
11994
11995#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11996 /* Save variables in volatile registers. */
11997 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11998 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
11999 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
12000 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12001#endif
12002
12003 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
12004 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
12005 if (enmOp == kIemNativeEmitMemOp_Store)
12006 {
12007 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
12008 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
12009#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12010 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12011#else
12012 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
12013 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
12014#endif
12015 }
12016
12017 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
12018 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
12019#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12020 fVolGregMask);
12021#else
12022 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
12023#endif
12024
12025 if (iSegReg != UINT8_MAX)
12026 {
12027 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
12028 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
12029 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
12030 }
12031
12032 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12033 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12034
12035 /* Done setting up parameters, make the call. */
12036 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12037
12038 /*
12039 * Put the result in the right register if this is a fetch.
12040 */
12041 if (enmOp != kIemNativeEmitMemOp_Store)
12042 {
12043 Assert(idxRegValueFetch == pVarValue->idxReg);
12044 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
12045 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
12046 }
12047
12048#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12049 /* Restore variables and guest shadow registers to volatile registers. */
12050 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12051 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12052#endif
12053
12054#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12055 if (pReNative->Core.offPc)
12056 {
12057 /*
12058 * Time to restore the program counter to its original value.
12059 */
12060 /* Allocate a temporary PC register. */
12061 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
12062
12063 /* Restore the original value. */
12064 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
12065 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
12066
12067 /* Free and flush the PC register. */
12068 iemNativeRegFreeTmp(pReNative, idxPcReg);
12069 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
12070 }
12071#endif
12072
12073#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12074 if (!TlbState.fSkip)
12075 {
12076 /* end of TlbMiss - Jump to the done label. */
12077 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12078 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12079
12080 /*
12081 * TlbLookup:
12082 */
12083 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
12084 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
12085 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
12086
12087 /*
12088 * Emit code to do the actual storing / fetching.
12089 */
12090 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12091# ifdef VBOX_WITH_STATISTICS
12092 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12093 enmOp == kIemNativeEmitMemOp_Store
12094 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
12095 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
12096# endif
12097 switch (enmOp)
12098 {
12099 case kIemNativeEmitMemOp_Store:
12100 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
12101 {
12102 switch (cbMem)
12103 {
12104 case 1:
12105 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12106 break;
12107 case 2:
12108 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12109 break;
12110 case 4:
12111 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12112 break;
12113 case 8:
12114 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12115 break;
12116 default:
12117 AssertFailed();
12118 }
12119 }
12120 else
12121 {
12122 switch (cbMem)
12123 {
12124 case 1:
12125 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
12126 idxRegMemResult, TlbState.idxReg1);
12127 break;
12128 case 2:
12129 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
12130 idxRegMemResult, TlbState.idxReg1);
12131 break;
12132 case 4:
12133 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
12134 idxRegMemResult, TlbState.idxReg1);
12135 break;
12136 case 8:
12137 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
12138 idxRegMemResult, TlbState.idxReg1);
12139 break;
12140 default:
12141 AssertFailed();
12142 }
12143 }
12144 break;
12145
12146 case kIemNativeEmitMemOp_Fetch:
12147 case kIemNativeEmitMemOp_Fetch_Zx_U16:
12148 case kIemNativeEmitMemOp_Fetch_Zx_U32:
12149 case kIemNativeEmitMemOp_Fetch_Zx_U64:
12150 switch (cbMem)
12151 {
12152 case 1:
12153 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12154 break;
12155 case 2:
12156 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12157 break;
12158 case 4:
12159 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12160 break;
12161 case 8:
12162 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12163 break;
12164 default:
12165 AssertFailed();
12166 }
12167 break;
12168
12169 case kIemNativeEmitMemOp_Fetch_Sx_U16:
12170 Assert(cbMem == 1);
12171 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12172 break;
12173
12174 case kIemNativeEmitMemOp_Fetch_Sx_U32:
12175 Assert(cbMem == 1 || cbMem == 2);
12176 if (cbMem == 1)
12177 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12178 else
12179 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12180 break;
12181
12182 case kIemNativeEmitMemOp_Fetch_Sx_U64:
12183 switch (cbMem)
12184 {
12185 case 1:
12186 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12187 break;
12188 case 2:
12189 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12190 break;
12191 case 4:
12192 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12193 break;
12194 default:
12195 AssertFailed();
12196 }
12197 break;
12198
12199 default:
12200 AssertFailed();
12201 }
12202
12203 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12204
12205 /*
12206 * TlbDone:
12207 */
12208 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12209
12210 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12211
12212# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12213 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12214 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12215# endif
12216 }
12217#else
12218 RT_NOREF(fAlignMask, idxLabelTlbMiss);
12219#endif
12220
12221 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
12222 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12223 return off;
12224}
12225
12226
12227
12228/*********************************************************************************************************************************
12229* Memory fetches (IEM_MEM_FETCH_XXX). *
12230*********************************************************************************************************************************/
12231
12232/* 8-bit segmented: */
12233#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
12234 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
12235 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
12236 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12237
12238#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12239 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12240 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
12241 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12242
12243#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12244 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12245 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12246 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12247
12248#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12249 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12250 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12251 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12252
12253#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12254 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12255 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
12256 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
12257
12258#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12259 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12260 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12261 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
12262
12263#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12264 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12265 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12266 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
12267
12268/* 16-bit segmented: */
12269#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12270 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12271 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12272 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12273
12274#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
12275 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12276 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12277 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
12278
12279#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12280 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12281 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12282 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12283
12284#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12285 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12286 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12287 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12288
12289#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12290 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12291 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12292 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
12293
12294#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12295 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12296 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12297 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
12298
12299
12300/* 32-bit segmented: */
12301#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12302 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12303 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12304 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
12305
12306#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
12307 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12308 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12309 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
12310
12311#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12312 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12313 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12314 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
12315
12316#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12317 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12318 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12319 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
12320
12321
12322/* 64-bit segmented: */
12323#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12324 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12325 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
12326 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
12327
12328
12329
12330/* 8-bit flat: */
12331#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
12332 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
12333 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
12334 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12335
12336#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
12337 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12338 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
12339 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12340
12341#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
12342 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12343 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12344 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12345
12346#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
12347 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12348 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12349 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12350
12351#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
12352 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12353 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
12354 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
12355
12356#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
12357 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12358 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12359 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
12360
12361#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
12362 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12363 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12364 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
12365
12366
12367/* 16-bit flat: */
12368#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
12369 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12370 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12371 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12372
12373#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
12374 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12375 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12376 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
12377
12378#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
12379 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12380 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12381 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12382
12383#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
12384 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12385 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12386 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12387
12388#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
12389 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12390 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12391 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
12392
12393#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
12394 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12395 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12396 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
12397
12398/* 32-bit flat: */
12399#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
12400 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12401 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12402 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12403
12404#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
12405 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12406 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12407 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
12408
12409#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
12410 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12411 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12412 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12413
12414#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
12415 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12416 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12417 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
12418
12419/* 64-bit flat: */
12420#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
12421 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12422 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
12423 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
12424
12425
12426
12427/*********************************************************************************************************************************
12428* Memory stores (IEM_MEM_STORE_XXX). *
12429*********************************************************************************************************************************/
12430
12431#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
12432 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
12433 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12434 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12435
12436#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
12437 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
12438 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12439 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12440
12441#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
12442 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
12443 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12444 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12445
12446#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
12447 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
12448 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12449 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12450
12451
12452#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
12453 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
12454 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12455 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12456
12457#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
12458 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
12459 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12460 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12461
12462#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
12463 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
12464 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12465 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12466
12467#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
12468 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
12469 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12470 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12471
12472
12473#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
12474 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12475 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12476
12477#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
12478 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12479 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12480
12481#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
12482 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12483 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12484
12485#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
12486 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12487 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12488
12489
12490#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
12491 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12492 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12493
12494#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
12495 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12496 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12497
12498#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
12499 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12500 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12501
12502#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
12503 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12504 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12505
12506/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
12507 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
12508DECL_INLINE_THROW(uint32_t)
12509iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
12510 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
12511{
12512 /*
12513 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
12514 * to do the grunt work.
12515 */
12516 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
12517 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
12518 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
12519 pfnFunction, idxInstr);
12520 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
12521 return off;
12522}
12523
12524
12525
12526/*********************************************************************************************************************************
12527* Stack Accesses. *
12528*********************************************************************************************************************************/
12529/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
12530#define IEM_MC_PUSH_U16(a_u16Value) \
12531 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12532 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
12533#define IEM_MC_PUSH_U32(a_u32Value) \
12534 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12535 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
12536#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
12537 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
12538 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
12539#define IEM_MC_PUSH_U64(a_u64Value) \
12540 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12541 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
12542
12543#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
12544 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12545 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12546#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
12547 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12548 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
12549#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
12550 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
12551 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
12552
12553#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
12554 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12555 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12556#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
12557 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12558 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
12559
12560
12561DECL_FORCE_INLINE_THROW(uint32_t)
12562iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12563{
12564 /* Use16BitSp: */
12565#ifdef RT_ARCH_AMD64
12566 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12567 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12568#else
12569 /* sub regeff, regrsp, #cbMem */
12570 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
12571 /* and regeff, regeff, #0xffff */
12572 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12573 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
12574 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
12575 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
12576#endif
12577 return off;
12578}
12579
12580
12581DECL_FORCE_INLINE(uint32_t)
12582iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12583{
12584 /* Use32BitSp: */
12585 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12586 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12587 return off;
12588}
12589
12590
12591/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
12592DECL_INLINE_THROW(uint32_t)
12593iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
12594 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12595{
12596 /*
12597 * Assert sanity.
12598 */
12599 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12600 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12601#ifdef VBOX_STRICT
12602 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12603 {
12604 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12605 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12606 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12607 Assert( pfnFunction
12608 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12609 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
12610 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
12611 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12612 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
12613 : UINT64_C(0xc000b000a0009000) ));
12614 }
12615 else
12616 Assert( pfnFunction
12617 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
12618 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
12619 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
12620 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
12621 : UINT64_C(0xc000b000a0009000) ));
12622#endif
12623
12624#ifdef VBOX_STRICT
12625 /*
12626 * Check that the fExec flags we've got make sense.
12627 */
12628 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12629#endif
12630
12631 /*
12632 * To keep things simple we have to commit any pending writes first as we
12633 * may end up making calls.
12634 */
12635 /** @todo we could postpone this till we make the call and reload the
12636 * registers after returning from the call. Not sure if that's sensible or
12637 * not, though. */
12638 off = iemNativeRegFlushPendingWrites(pReNative, off);
12639
12640 /*
12641 * First we calculate the new RSP and the effective stack pointer value.
12642 * For 64-bit mode and flat 32-bit these two are the same.
12643 * (Code structure is very similar to that of PUSH)
12644 */
12645 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12646 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
12647 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
12648 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
12649 ? cbMem : sizeof(uint16_t);
12650 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12651 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12652 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12653 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12654 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12655 if (cBitsFlat != 0)
12656 {
12657 Assert(idxRegEffSp == idxRegRsp);
12658 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12659 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12660 if (cBitsFlat == 64)
12661 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
12662 else
12663 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
12664 }
12665 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12666 {
12667 Assert(idxRegEffSp != idxRegRsp);
12668 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12669 kIemNativeGstRegUse_ReadOnly);
12670#ifdef RT_ARCH_AMD64
12671 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12672#else
12673 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12674#endif
12675 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12676 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12677 offFixupJumpToUseOtherBitSp = off;
12678 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12679 {
12680 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12681 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12682 }
12683 else
12684 {
12685 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12686 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12687 }
12688 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12689 }
12690 /* SpUpdateEnd: */
12691 uint32_t const offLabelSpUpdateEnd = off;
12692
12693 /*
12694 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12695 * we're skipping lookup).
12696 */
12697 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12698 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
12699 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12700 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12701 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12702 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12703 : UINT32_MAX;
12704 uint8_t const idxRegValue = !TlbState.fSkip
12705 && pVarValue->enmKind != kIemNativeVarKind_Immediate
12706 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
12707 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
12708 : UINT8_MAX;
12709 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
12710
12711
12712 if (!TlbState.fSkip)
12713 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12714 else
12715 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12716
12717 /*
12718 * Use16BitSp:
12719 */
12720 if (cBitsFlat == 0)
12721 {
12722#ifdef RT_ARCH_AMD64
12723 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12724#else
12725 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12726#endif
12727 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12728 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12729 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12730 else
12731 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12732 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12733 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12734 }
12735
12736 /*
12737 * TlbMiss:
12738 *
12739 * Call helper to do the pushing.
12740 */
12741 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12742
12743#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12744 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12745#else
12746 RT_NOREF(idxInstr);
12747#endif
12748
12749 /* Save variables in volatile registers. */
12750 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12751 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12752 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
12753 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
12754 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12755
12756 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
12757 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
12758 {
12759 /* Swap them using ARG0 as temp register: */
12760 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
12761 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
12762 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
12763 }
12764 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
12765 {
12766 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
12767 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
12768 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12769
12770 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
12771 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12772 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12773 }
12774 else
12775 {
12776 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
12777 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12778
12779 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
12780 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
12781 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
12782 }
12783
12784 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12785 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12786
12787 /* Done setting up parameters, make the call. */
12788 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12789
12790 /* Restore variables and guest shadow registers to volatile registers. */
12791 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12792 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12793
12794#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12795 if (!TlbState.fSkip)
12796 {
12797 /* end of TlbMiss - Jump to the done label. */
12798 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12799 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12800
12801 /*
12802 * TlbLookup:
12803 */
12804 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
12805 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12806
12807 /*
12808 * Emit code to do the actual storing / fetching.
12809 */
12810 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12811# ifdef VBOX_WITH_STATISTICS
12812 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12813 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12814# endif
12815 if (idxRegValue != UINT8_MAX)
12816 {
12817 switch (cbMemAccess)
12818 {
12819 case 2:
12820 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12821 break;
12822 case 4:
12823 if (!fIsIntelSeg)
12824 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12825 else
12826 {
12827 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
12828 PUSH FS in real mode, so we have to try emulate that here.
12829 We borrow the now unused idxReg1 from the TLB lookup code here. */
12830 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
12831 kIemNativeGstReg_EFlags);
12832 if (idxRegEfl != UINT8_MAX)
12833 {
12834#ifdef ARCH_AMD64
12835 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
12836 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12837 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12838#else
12839 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
12840 off, TlbState.idxReg1, idxRegEfl,
12841 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12842#endif
12843 iemNativeRegFreeTmp(pReNative, idxRegEfl);
12844 }
12845 else
12846 {
12847 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
12848 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
12849 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12850 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12851 }
12852 /* ASSUMES the upper half of idxRegValue is ZERO. */
12853 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
12854 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
12855 }
12856 break;
12857 case 8:
12858 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12859 break;
12860 default:
12861 AssertFailed();
12862 }
12863 }
12864 else
12865 {
12866 switch (cbMemAccess)
12867 {
12868 case 2:
12869 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
12870 idxRegMemResult, TlbState.idxReg1);
12871 break;
12872 case 4:
12873 Assert(!fIsSegReg);
12874 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
12875 idxRegMemResult, TlbState.idxReg1);
12876 break;
12877 case 8:
12878 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
12879 break;
12880 default:
12881 AssertFailed();
12882 }
12883 }
12884
12885 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12886 TlbState.freeRegsAndReleaseVars(pReNative);
12887
12888 /*
12889 * TlbDone:
12890 *
12891 * Commit the new RSP value.
12892 */
12893 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12894 }
12895#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12896
12897 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
12898 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12899 if (idxRegEffSp != idxRegRsp)
12900 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12901
12902 /* The value variable is implictly flushed. */
12903 if (idxRegValue != UINT8_MAX)
12904 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12905 iemNativeVarFreeLocal(pReNative, idxVarValue);
12906
12907 return off;
12908}
12909
12910
12911
12912/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
12913#define IEM_MC_POP_GREG_U16(a_iGReg) \
12914 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12915 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
12916#define IEM_MC_POP_GREG_U32(a_iGReg) \
12917 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12918 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
12919#define IEM_MC_POP_GREG_U64(a_iGReg) \
12920 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12921 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
12922
12923#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
12924 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12925 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12926#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
12927 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12928 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
12929
12930#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
12931 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12932 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12933#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
12934 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12935 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
12936
12937
12938DECL_FORCE_INLINE_THROW(uint32_t)
12939iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
12940 uint8_t idxRegTmp)
12941{
12942 /* Use16BitSp: */
12943#ifdef RT_ARCH_AMD64
12944 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12945 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12946 RT_NOREF(idxRegTmp);
12947#else
12948 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
12949 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
12950 /* add tmp, regrsp, #cbMem */
12951 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
12952 /* and tmp, tmp, #0xffff */
12953 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12954 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
12955 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
12956 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
12957#endif
12958 return off;
12959}
12960
12961
12962DECL_FORCE_INLINE(uint32_t)
12963iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12964{
12965 /* Use32BitSp: */
12966 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12967 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12968 return off;
12969}
12970
12971
12972/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
12973DECL_INLINE_THROW(uint32_t)
12974iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
12975 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12976{
12977 /*
12978 * Assert sanity.
12979 */
12980 Assert(idxGReg < 16);
12981#ifdef VBOX_STRICT
12982 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12983 {
12984 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12985 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12986 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12987 Assert( pfnFunction
12988 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12989 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
12990 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12991 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
12992 : UINT64_C(0xc000b000a0009000) ));
12993 }
12994 else
12995 Assert( pfnFunction
12996 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
12997 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
12998 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
12999 : UINT64_C(0xc000b000a0009000) ));
13000#endif
13001
13002#ifdef VBOX_STRICT
13003 /*
13004 * Check that the fExec flags we've got make sense.
13005 */
13006 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13007#endif
13008
13009 /*
13010 * To keep things simple we have to commit any pending writes first as we
13011 * may end up making calls.
13012 */
13013 off = iemNativeRegFlushPendingWrites(pReNative, off);
13014
13015 /*
13016 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
13017 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
13018 * directly as the effective stack pointer.
13019 * (Code structure is very similar to that of PUSH)
13020 */
13021 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13022 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13023 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13024 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13025 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13026 /** @todo can do a better job picking the register here. For cbMem >= 4 this
13027 * will be the resulting register value. */
13028 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
13029
13030 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13031 if (cBitsFlat != 0)
13032 {
13033 Assert(idxRegEffSp == idxRegRsp);
13034 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13035 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13036 }
13037 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13038 {
13039 Assert(idxRegEffSp != idxRegRsp);
13040 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13041 kIemNativeGstRegUse_ReadOnly);
13042#ifdef RT_ARCH_AMD64
13043 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13044#else
13045 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13046#endif
13047 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13048 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13049 offFixupJumpToUseOtherBitSp = off;
13050 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13051 {
13052/** @todo can skip idxRegRsp updating when popping ESP. */
13053 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13054 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13055 }
13056 else
13057 {
13058 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13059 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
13060 }
13061 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13062 }
13063 /* SpUpdateEnd: */
13064 uint32_t const offLabelSpUpdateEnd = off;
13065
13066 /*
13067 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13068 * we're skipping lookup).
13069 */
13070 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13071 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
13072 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13073 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13074 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13075 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13076 : UINT32_MAX;
13077
13078 if (!TlbState.fSkip)
13079 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13080 else
13081 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13082
13083 /*
13084 * Use16BitSp:
13085 */
13086 if (cBitsFlat == 0)
13087 {
13088#ifdef RT_ARCH_AMD64
13089 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13090#else
13091 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13092#endif
13093 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13094 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13095 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
13096 else
13097 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13098 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13099 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13100 }
13101
13102 /*
13103 * TlbMiss:
13104 *
13105 * Call helper to do the pushing.
13106 */
13107 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13108
13109#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13110 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13111#else
13112 RT_NOREF(idxInstr);
13113#endif
13114
13115 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13116 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13117 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
13118 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13119
13120
13121 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
13122 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13123 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13124
13125 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13126 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13127
13128 /* Done setting up parameters, make the call. */
13129 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13130
13131 /* Move the return register content to idxRegMemResult. */
13132 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
13133 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
13134
13135 /* Restore variables and guest shadow registers to volatile registers. */
13136 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13137 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13138
13139#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13140 if (!TlbState.fSkip)
13141 {
13142 /* end of TlbMiss - Jump to the done label. */
13143 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13144 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13145
13146 /*
13147 * TlbLookup:
13148 */
13149 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
13150 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13151
13152 /*
13153 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
13154 */
13155 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13156# ifdef VBOX_WITH_STATISTICS
13157 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13158 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13159# endif
13160 switch (cbMem)
13161 {
13162 case 2:
13163 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13164 break;
13165 case 4:
13166 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13167 break;
13168 case 8:
13169 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13170 break;
13171 default:
13172 AssertFailed();
13173 }
13174
13175 TlbState.freeRegsAndReleaseVars(pReNative);
13176
13177 /*
13178 * TlbDone:
13179 *
13180 * Set the new RSP value (FLAT accesses needs to calculate it first) and
13181 * commit the popped register value.
13182 */
13183 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13184 }
13185#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
13186
13187 if (idxGReg != X86_GREG_xSP)
13188 {
13189 /* Set the register. */
13190 if (cbMem >= sizeof(uint32_t))
13191 {
13192#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13193 AssertMsg( pReNative->idxCurCall == 0
13194 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
13195 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
13196#endif
13197 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
13198 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
13199 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
13200 }
13201 else
13202 {
13203 Assert(cbMem == sizeof(uint16_t));
13204 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
13205 kIemNativeGstRegUse_ForUpdate);
13206 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
13207 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
13208 iemNativeRegFreeTmp(pReNative, idxRegDst);
13209 }
13210
13211 /* Complete RSP calculation for FLAT mode. */
13212 if (idxRegEffSp == idxRegRsp)
13213 {
13214 if (cBitsFlat == 64)
13215 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
13216 else
13217 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
13218 }
13219 }
13220 else
13221 {
13222 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
13223 if (cbMem == sizeof(uint64_t))
13224 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
13225 else if (cbMem == sizeof(uint32_t))
13226 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
13227 else
13228 {
13229 if (idxRegEffSp == idxRegRsp)
13230 {
13231 if (cBitsFlat == 64)
13232 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
13233 else
13234 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
13235 }
13236 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
13237 }
13238 }
13239 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
13240
13241 iemNativeRegFreeTmp(pReNative, idxRegRsp);
13242 if (idxRegEffSp != idxRegRsp)
13243 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
13244 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13245
13246 return off;
13247}
13248
13249
13250
13251/*********************************************************************************************************************************
13252* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
13253*********************************************************************************************************************************/
13254
13255#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13256 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13257 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
13258 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
13259
13260#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13261 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13262 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
13263 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
13264
13265#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13266 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13267 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
13268 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
13269
13270#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13271 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13272 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
13273 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
13274
13275
13276#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13277 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13278 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13279 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
13280
13281#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13282 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13283 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13284 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
13285
13286#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13287 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13288 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13289 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
13290
13291#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13292 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13293 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13294 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
13295
13296#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13297 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
13298 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13299 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
13300
13301
13302#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13303 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13304 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13305 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
13306
13307#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13308 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13309 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13310 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
13311
13312#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13313 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13314 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13315 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
13316
13317#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13318 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13319 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13320 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
13321
13322#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13323 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
13324 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13325 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
13326
13327
13328#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13329 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13330 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13331 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
13332
13333#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13334 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13335 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13336 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
13337#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13338 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13339 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13340 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
13341
13342#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13343 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13344 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13345 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
13346
13347#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13348 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
13349 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13350 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
13351
13352
13353#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13354 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
13355 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13356 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
13357
13358#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13359 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
13360 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
13361 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
13362
13363
13364#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13365 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13366 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13367 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
13368
13369#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13370 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13371 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13372 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
13373
13374#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13375 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13376 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13377 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
13378
13379#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13380 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13381 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13382 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
13383
13384
13385
13386#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13387 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13388 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
13389 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
13390
13391#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13392 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13393 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
13394 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
13395
13396#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13397 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13398 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
13399 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
13400
13401#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13402 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13403 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
13404 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
13405
13406
13407#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13408 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13409 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13410 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
13411
13412#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13413 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13414 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13415 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
13416
13417#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13418 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13419 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13420 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13421
13422#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13423 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13424 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13425 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
13426
13427#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
13428 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
13429 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13430 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13431
13432
13433#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13434 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13435 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13436 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
13437
13438#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13439 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13440 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13441 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
13442
13443#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13444 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13445 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13446 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13447
13448#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13449 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13450 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13451 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
13452
13453#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
13454 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
13455 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13456 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13457
13458
13459#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13460 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13461 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13462 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
13463
13464#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13465 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13466 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13467 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
13468
13469#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13470 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13471 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13472 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13473
13474#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13475 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13476 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13477 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
13478
13479#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
13480 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
13481 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13482 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13483
13484
13485#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
13486 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13487 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13488 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
13489
13490#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
13491 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13492 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
13493 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
13494
13495
13496#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13497 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13498 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13499 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
13500
13501#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13502 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13503 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13504 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
13505
13506#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13507 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13508 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13509 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
13510
13511#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13512 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13513 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13514 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
13515
13516
13517DECL_INLINE_THROW(uint32_t)
13518iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
13519 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
13520 uintptr_t pfnFunction, uint8_t idxInstr)
13521{
13522 /*
13523 * Assert sanity.
13524 */
13525 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
13526 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
13527 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
13528 && pVarMem->cbVar == sizeof(void *),
13529 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13530
13531 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13532 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13533 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
13534 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
13535 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13536
13537 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
13538 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
13539 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
13540 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
13541 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13542
13543 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
13544
13545 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
13546
13547#ifdef VBOX_STRICT
13548# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
13549 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
13550 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
13551 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
13552 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
13553# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
13554 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
13555 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
13556 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
13557
13558 if (iSegReg == UINT8_MAX)
13559 {
13560 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13561 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13562 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13563 switch (cbMem)
13564 {
13565 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
13566 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
13567 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
13568 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
13569 case 10:
13570 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
13571 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
13572 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13573 break;
13574 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
13575# if 0
13576 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
13577 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
13578# endif
13579 default: AssertFailed(); break;
13580 }
13581 }
13582 else
13583 {
13584 Assert(iSegReg < 6);
13585 switch (cbMem)
13586 {
13587 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
13588 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
13589 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
13590 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
13591 case 10:
13592 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
13593 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
13594 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13595 break;
13596 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
13597# if 0
13598 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
13599 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
13600# endif
13601 default: AssertFailed(); break;
13602 }
13603 }
13604# undef IEM_MAP_HLP_FN
13605# undef IEM_MAP_HLP_FN_NO_AT
13606#endif
13607
13608#ifdef VBOX_STRICT
13609 /*
13610 * Check that the fExec flags we've got make sense.
13611 */
13612 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13613#endif
13614
13615 /*
13616 * To keep things simple we have to commit any pending writes first as we
13617 * may end up making calls.
13618 */
13619 off = iemNativeRegFlushPendingWrites(pReNative, off);
13620
13621#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13622 /*
13623 * Move/spill/flush stuff out of call-volatile registers.
13624 * This is the easy way out. We could contain this to the tlb-miss branch
13625 * by saving and restoring active stuff here.
13626 */
13627 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
13628 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13629#endif
13630
13631 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
13632 while the tlb-miss codepath will temporarily put it on the stack.
13633 Set the the type to stack here so we don't need to do it twice below. */
13634 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
13635 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
13636 /** @todo use a tmp register from TlbState, since they'll be free after tlb
13637 * lookup is done. */
13638
13639 /*
13640 * Define labels and allocate the result register (trying for the return
13641 * register if we can).
13642 */
13643 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13644 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13645 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
13646 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
13647 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
13648 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13649 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13650 : UINT32_MAX;
13651//off=iemNativeEmitBrk(pReNative, off, 0);
13652 /*
13653 * Jump to the TLB lookup code.
13654 */
13655 if (!TlbState.fSkip)
13656 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13657
13658 /*
13659 * TlbMiss:
13660 *
13661 * Call helper to do the fetching.
13662 * We flush all guest register shadow copies here.
13663 */
13664 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13665
13666#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13667 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13668#else
13669 RT_NOREF(idxInstr);
13670#endif
13671
13672#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13673 /* Save variables in volatile registers. */
13674 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
13675 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13676#endif
13677
13678 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
13679 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
13680#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13681 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13682#else
13683 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13684#endif
13685
13686 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
13687 if (iSegReg != UINT8_MAX)
13688 {
13689 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13690 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
13691 }
13692
13693 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
13694 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
13695 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
13696
13697 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13698 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13699
13700 /* Done setting up parameters, make the call. */
13701 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13702
13703 /*
13704 * Put the output in the right registers.
13705 */
13706 Assert(idxRegMemResult == pVarMem->idxReg);
13707 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
13708 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
13709
13710#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13711 /* Restore variables and guest shadow registers to volatile registers. */
13712 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13713 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13714#endif
13715
13716 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
13717 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
13718
13719#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13720 if (!TlbState.fSkip)
13721 {
13722 /* end of tlbsmiss - Jump to the done label. */
13723 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13724 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13725
13726 /*
13727 * TlbLookup:
13728 */
13729 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
13730 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13731# ifdef VBOX_WITH_STATISTICS
13732 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
13733 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
13734# endif
13735
13736 /* [idxVarUnmapInfo] = 0; */
13737 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
13738
13739 /*
13740 * TlbDone:
13741 */
13742 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13743
13744 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13745
13746# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13747 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13748 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13749# endif
13750 }
13751#else
13752 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
13753#endif
13754
13755 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13756 iemNativeVarRegisterRelease(pReNative, idxVarMem);
13757
13758 return off;
13759}
13760
13761
13762#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
13763 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
13764 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
13765
13766#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
13767 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
13768 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
13769
13770#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
13771 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
13772 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
13773
13774#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
13775 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
13776 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
13777
13778DECL_INLINE_THROW(uint32_t)
13779iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
13780 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
13781{
13782 /*
13783 * Assert sanity.
13784 */
13785 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13786#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
13787 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13788#endif
13789 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
13790 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
13791 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
13792#ifdef VBOX_STRICT
13793 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
13794 {
13795 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
13796 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
13797 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
13798 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
13799 case IEM_ACCESS_TYPE_WRITE:
13800 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
13801 case IEM_ACCESS_TYPE_READ:
13802 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
13803 default: AssertFailed();
13804 }
13805#else
13806 RT_NOREF(fAccess);
13807#endif
13808
13809 /*
13810 * To keep things simple we have to commit any pending writes first as we
13811 * may end up making calls (there shouldn't be any at this point, so this
13812 * is just for consistency).
13813 */
13814 /** @todo we could postpone this till we make the call and reload the
13815 * registers after returning from the call. Not sure if that's sensible or
13816 * not, though. */
13817 off = iemNativeRegFlushPendingWrites(pReNative, off);
13818
13819 /*
13820 * Move/spill/flush stuff out of call-volatile registers.
13821 *
13822 * We exclude any register holding the bUnmapInfo variable, as we'll be
13823 * checking it after returning from the call and will free it afterwards.
13824 */
13825 /** @todo save+restore active registers and maybe guest shadows in miss
13826 * scenario. */
13827 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
13828
13829 /*
13830 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
13831 * to call the unmap helper function.
13832 *
13833 * The likelyhood of it being zero is higher than for the TLB hit when doing
13834 * the mapping, as a TLB miss for an well aligned and unproblematic memory
13835 * access should also end up with a mapping that won't need special unmapping.
13836 */
13837 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
13838 * should speed up things for the pure interpreter as well when TLBs
13839 * are enabled. */
13840#ifdef RT_ARCH_AMD64
13841 if (pVarUnmapInfo->idxReg == UINT8_MAX)
13842 {
13843 /* test byte [rbp - xxx], 0ffh */
13844 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
13845 pbCodeBuf[off++] = 0xf6;
13846 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
13847 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
13848 pbCodeBuf[off++] = 0xff;
13849 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13850 }
13851 else
13852#endif
13853 {
13854 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
13855 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
13856 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
13857 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13858 }
13859 uint32_t const offJmpFixup = off;
13860 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
13861
13862 /*
13863 * Call the unmap helper function.
13864 */
13865#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
13866 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13867#else
13868 RT_NOREF(idxInstr);
13869#endif
13870
13871 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
13872 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
13873 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13874
13875 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13876 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13877
13878 /* Done setting up parameters, make the call. */
13879 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13880
13881 /* The bUnmapInfo variable is implictly free by these MCs. */
13882 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
13883
13884 /*
13885 * Done, just fixup the jump for the non-call case.
13886 */
13887 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
13888
13889 return off;
13890}
13891
13892
13893
13894/*********************************************************************************************************************************
13895* State and Exceptions *
13896*********************************************************************************************************************************/
13897
13898#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13899#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13900
13901#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13902#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13903#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13904
13905#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13906#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13907#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13908
13909
13910DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
13911{
13912 /** @todo this needs a lot more work later. */
13913 RT_NOREF(pReNative, fForChange);
13914 return off;
13915}
13916
13917
13918
13919/*********************************************************************************************************************************
13920* Emitters for FPU related operations. *
13921*********************************************************************************************************************************/
13922
13923#define IEM_MC_FETCH_FCW(a_u16Fcw) \
13924 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
13925
13926/** Emits code for IEM_MC_FETCH_FCW. */
13927DECL_INLINE_THROW(uint32_t)
13928iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13929{
13930 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13931 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
13932
13933 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
13934
13935 /* Allocate a temporary FCW register. */
13936 /** @todo eliminate extra register */
13937 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
13938 kIemNativeGstRegUse_ReadOnly);
13939
13940 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
13941
13942 /* Free but don't flush the FCW register. */
13943 iemNativeRegFreeTmp(pReNative, idxFcwReg);
13944 iemNativeVarRegisterRelease(pReNative, idxDstVar);
13945
13946 return off;
13947}
13948
13949
13950#define IEM_MC_FETCH_FSW(a_u16Fsw) \
13951 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
13952
13953/** Emits code for IEM_MC_FETCH_FSW. */
13954DECL_INLINE_THROW(uint32_t)
13955iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13956{
13957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13958 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
13959
13960 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
13961 /* Allocate a temporary FSW register. */
13962 /** @todo eliminate extra register */
13963 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
13964 kIemNativeGstRegUse_ReadOnly);
13965
13966 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
13967
13968 /* Free but don't flush the FSW register. */
13969 iemNativeRegFreeTmp(pReNative, idxFswReg);
13970 iemNativeVarRegisterRelease(pReNative, idxDstVar);
13971
13972 return off;
13973}
13974
13975
13976
13977/*********************************************************************************************************************************
13978* The native code generator functions for each MC block. *
13979*********************************************************************************************************************************/
13980
13981/*
13982 * Include instruction emitters.
13983 */
13984#include "target-x86/IEMAllN8veEmit-x86.h"
13985
13986/*
13987 * Include g_apfnIemNativeRecompileFunctions and associated functions.
13988 *
13989 * This should probably live in it's own file later, but lets see what the
13990 * compile times turn out to be first.
13991 */
13992#include "IEMNativeFunctions.cpp.h"
13993
13994
13995
13996/*********************************************************************************************************************************
13997* Recompiler Core. *
13998*********************************************************************************************************************************/
13999
14000
14001/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
14002static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
14003{
14004 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
14005 pDis->cbCachedInstr += cbMaxRead;
14006 RT_NOREF(cbMinRead);
14007 return VERR_NO_DATA;
14008}
14009
14010
14011DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
14012{
14013 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
14014 {
14015#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
14016 ENTRY(fLocalForcedActions),
14017 ENTRY(iem.s.rcPassUp),
14018 ENTRY(iem.s.fExec),
14019 ENTRY(iem.s.pbInstrBuf),
14020 ENTRY(iem.s.uInstrBufPc),
14021 ENTRY(iem.s.GCPhysInstrBuf),
14022 ENTRY(iem.s.cbInstrBufTotal),
14023 ENTRY(iem.s.idxTbCurInstr),
14024#ifdef VBOX_WITH_STATISTICS
14025 ENTRY(iem.s.StatNativeTlbHitsForFetch),
14026 ENTRY(iem.s.StatNativeTlbHitsForStore),
14027 ENTRY(iem.s.StatNativeTlbHitsForStack),
14028 ENTRY(iem.s.StatNativeTlbHitsForMapped),
14029 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
14030 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
14031 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
14032 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
14033#endif
14034 ENTRY(iem.s.DataTlb.aEntries),
14035 ENTRY(iem.s.DataTlb.uTlbRevision),
14036 ENTRY(iem.s.DataTlb.uTlbPhysRev),
14037 ENTRY(iem.s.DataTlb.cTlbHits),
14038 ENTRY(iem.s.CodeTlb.aEntries),
14039 ENTRY(iem.s.CodeTlb.uTlbRevision),
14040 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
14041 ENTRY(iem.s.CodeTlb.cTlbHits),
14042 ENTRY(pVMR3),
14043 ENTRY(cpum.GstCtx.rax),
14044 ENTRY(cpum.GstCtx.ah),
14045 ENTRY(cpum.GstCtx.rcx),
14046 ENTRY(cpum.GstCtx.ch),
14047 ENTRY(cpum.GstCtx.rdx),
14048 ENTRY(cpum.GstCtx.dh),
14049 ENTRY(cpum.GstCtx.rbx),
14050 ENTRY(cpum.GstCtx.bh),
14051 ENTRY(cpum.GstCtx.rsp),
14052 ENTRY(cpum.GstCtx.rbp),
14053 ENTRY(cpum.GstCtx.rsi),
14054 ENTRY(cpum.GstCtx.rdi),
14055 ENTRY(cpum.GstCtx.r8),
14056 ENTRY(cpum.GstCtx.r9),
14057 ENTRY(cpum.GstCtx.r10),
14058 ENTRY(cpum.GstCtx.r11),
14059 ENTRY(cpum.GstCtx.r12),
14060 ENTRY(cpum.GstCtx.r13),
14061 ENTRY(cpum.GstCtx.r14),
14062 ENTRY(cpum.GstCtx.r15),
14063 ENTRY(cpum.GstCtx.es.Sel),
14064 ENTRY(cpum.GstCtx.es.u64Base),
14065 ENTRY(cpum.GstCtx.es.u32Limit),
14066 ENTRY(cpum.GstCtx.es.Attr),
14067 ENTRY(cpum.GstCtx.cs.Sel),
14068 ENTRY(cpum.GstCtx.cs.u64Base),
14069 ENTRY(cpum.GstCtx.cs.u32Limit),
14070 ENTRY(cpum.GstCtx.cs.Attr),
14071 ENTRY(cpum.GstCtx.ss.Sel),
14072 ENTRY(cpum.GstCtx.ss.u64Base),
14073 ENTRY(cpum.GstCtx.ss.u32Limit),
14074 ENTRY(cpum.GstCtx.ss.Attr),
14075 ENTRY(cpum.GstCtx.ds.Sel),
14076 ENTRY(cpum.GstCtx.ds.u64Base),
14077 ENTRY(cpum.GstCtx.ds.u32Limit),
14078 ENTRY(cpum.GstCtx.ds.Attr),
14079 ENTRY(cpum.GstCtx.fs.Sel),
14080 ENTRY(cpum.GstCtx.fs.u64Base),
14081 ENTRY(cpum.GstCtx.fs.u32Limit),
14082 ENTRY(cpum.GstCtx.fs.Attr),
14083 ENTRY(cpum.GstCtx.gs.Sel),
14084 ENTRY(cpum.GstCtx.gs.u64Base),
14085 ENTRY(cpum.GstCtx.gs.u32Limit),
14086 ENTRY(cpum.GstCtx.gs.Attr),
14087 ENTRY(cpum.GstCtx.rip),
14088 ENTRY(cpum.GstCtx.eflags),
14089 ENTRY(cpum.GstCtx.uRipInhibitInt),
14090#undef ENTRY
14091 };
14092#ifdef VBOX_STRICT
14093 static bool s_fOrderChecked = false;
14094 if (!s_fOrderChecked)
14095 {
14096 s_fOrderChecked = true;
14097 uint32_t offPrev = s_aMembers[0].off;
14098 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
14099 {
14100 Assert(s_aMembers[i].off > offPrev);
14101 offPrev = s_aMembers[i].off;
14102 }
14103 }
14104#endif
14105
14106 /*
14107 * Binary lookup.
14108 */
14109 unsigned iStart = 0;
14110 unsigned iEnd = RT_ELEMENTS(s_aMembers);
14111 for (;;)
14112 {
14113 unsigned const iCur = iStart + (iEnd - iStart) / 2;
14114 uint32_t const offCur = s_aMembers[iCur].off;
14115 if (off < offCur)
14116 {
14117 if (iCur != iStart)
14118 iEnd = iCur;
14119 else
14120 break;
14121 }
14122 else if (off > offCur)
14123 {
14124 if (iCur + 1 < iEnd)
14125 iStart = iCur + 1;
14126 else
14127 break;
14128 }
14129 else
14130 return s_aMembers[iCur].pszName;
14131 }
14132#ifdef VBOX_WITH_STATISTICS
14133 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
14134 return "iem.s.acThreadedFuncStats[iFn]";
14135#endif
14136 return NULL;
14137}
14138
14139
14140/**
14141 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
14142 * @returns pszBuf.
14143 * @param fFlags The flags.
14144 * @param pszBuf The output buffer.
14145 * @param cbBuf The output buffer size. At least 32 bytes.
14146 */
14147DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
14148{
14149 Assert(cbBuf >= 32);
14150 static RTSTRTUPLE const s_aModes[] =
14151 {
14152 /* [00] = */ { RT_STR_TUPLE("16BIT") },
14153 /* [01] = */ { RT_STR_TUPLE("32BIT") },
14154 /* [02] = */ { RT_STR_TUPLE("!2!") },
14155 /* [03] = */ { RT_STR_TUPLE("!3!") },
14156 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
14157 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
14158 /* [06] = */ { RT_STR_TUPLE("!6!") },
14159 /* [07] = */ { RT_STR_TUPLE("!7!") },
14160 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
14161 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
14162 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
14163 /* [0b] = */ { RT_STR_TUPLE("!b!") },
14164 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
14165 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
14166 /* [0e] = */ { RT_STR_TUPLE("!e!") },
14167 /* [0f] = */ { RT_STR_TUPLE("!f!") },
14168 /* [10] = */ { RT_STR_TUPLE("!10!") },
14169 /* [11] = */ { RT_STR_TUPLE("!11!") },
14170 /* [12] = */ { RT_STR_TUPLE("!12!") },
14171 /* [13] = */ { RT_STR_TUPLE("!13!") },
14172 /* [14] = */ { RT_STR_TUPLE("!14!") },
14173 /* [15] = */ { RT_STR_TUPLE("!15!") },
14174 /* [16] = */ { RT_STR_TUPLE("!16!") },
14175 /* [17] = */ { RT_STR_TUPLE("!17!") },
14176 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
14177 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
14178 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
14179 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
14180 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
14181 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
14182 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
14183 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
14184 };
14185 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
14186 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
14187 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
14188
14189 pszBuf[off++] = ' ';
14190 pszBuf[off++] = 'C';
14191 pszBuf[off++] = 'P';
14192 pszBuf[off++] = 'L';
14193 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
14194 Assert(off < 32);
14195
14196 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
14197
14198 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
14199 {
14200 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
14201 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
14202 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
14203 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
14204 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
14205 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
14206 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
14207 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
14208 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
14209 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
14210 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
14211 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
14212 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
14213 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
14214 };
14215 if (fFlags)
14216 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
14217 if (s_aFlags[i].fFlag & fFlags)
14218 {
14219 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
14220 pszBuf[off++] = ' ';
14221 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
14222 off += s_aFlags[i].cchName;
14223 fFlags &= ~s_aFlags[i].fFlag;
14224 if (!fFlags)
14225 break;
14226 }
14227 pszBuf[off] = '\0';
14228
14229 return pszBuf;
14230}
14231
14232
14233DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
14234{
14235 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
14236#if defined(RT_ARCH_AMD64)
14237 static const char * const a_apszMarkers[] =
14238 {
14239 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
14240 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
14241 };
14242#endif
14243
14244 char szDisBuf[512];
14245 DISSTATE Dis;
14246 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
14247 uint32_t const cNative = pTb->Native.cInstructions;
14248 uint32_t offNative = 0;
14249#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14250 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
14251#endif
14252 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
14253 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
14254 : DISCPUMODE_64BIT;
14255#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14256 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
14257#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14258 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
14259#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14260# error "Port me"
14261#else
14262 csh hDisasm = ~(size_t)0;
14263# if defined(RT_ARCH_AMD64)
14264 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
14265# elif defined(RT_ARCH_ARM64)
14266 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
14267# else
14268# error "Port me"
14269# endif
14270 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
14271
14272 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
14273 //Assert(rcCs == CS_ERR_OK);
14274#endif
14275
14276 /*
14277 * Print TB info.
14278 */
14279 pHlp->pfnPrintf(pHlp,
14280 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
14281 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
14282 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
14283 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
14284#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14285 if (pDbgInfo && pDbgInfo->cEntries > 1)
14286 {
14287 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
14288
14289 /*
14290 * This disassembly is driven by the debug info which follows the native
14291 * code and indicates when it starts with the next guest instructions,
14292 * where labels are and such things.
14293 */
14294 uint32_t idxThreadedCall = 0;
14295 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
14296 uint8_t idxRange = UINT8_MAX;
14297 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
14298 uint32_t offRange = 0;
14299 uint32_t offOpcodes = 0;
14300 uint32_t const cbOpcodes = pTb->cbOpcodes;
14301 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
14302 uint32_t const cDbgEntries = pDbgInfo->cEntries;
14303 uint32_t iDbgEntry = 1;
14304 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
14305
14306 while (offNative < cNative)
14307 {
14308 /* If we're at or have passed the point where the next chunk of debug
14309 info starts, process it. */
14310 if (offDbgNativeNext <= offNative)
14311 {
14312 offDbgNativeNext = UINT32_MAX;
14313 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
14314 {
14315 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
14316 {
14317 case kIemTbDbgEntryType_GuestInstruction:
14318 {
14319 /* Did the exec flag change? */
14320 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
14321 {
14322 pHlp->pfnPrintf(pHlp,
14323 " fExec change %#08x -> %#08x %s\n",
14324 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
14325 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
14326 szDisBuf, sizeof(szDisBuf)));
14327 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
14328 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
14329 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
14330 : DISCPUMODE_64BIT;
14331 }
14332
14333 /* New opcode range? We need to fend up a spurious debug info entry here for cases
14334 where the compilation was aborted before the opcode was recorded and the actual
14335 instruction was translated to a threaded call. This may happen when we run out
14336 of ranges, or when some complicated interrupts/FFs are found to be pending or
14337 similar. So, we just deal with it here rather than in the compiler code as it
14338 is a lot simpler to do here. */
14339 if ( idxRange == UINT8_MAX
14340 || idxRange >= cRanges
14341 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
14342 {
14343 idxRange += 1;
14344 if (idxRange < cRanges)
14345 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
14346 else
14347 continue;
14348 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
14349 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
14350 + (pTb->aRanges[idxRange].idxPhysPage == 0
14351 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14352 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
14353 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14354 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
14355 pTb->aRanges[idxRange].idxPhysPage);
14356 GCPhysPc += offRange;
14357 }
14358
14359 /* Disassemble the instruction. */
14360 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
14361 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
14362 uint32_t cbInstr = 1;
14363 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14364 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
14365 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14366 if (RT_SUCCESS(rc))
14367 {
14368 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14369 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14370 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14371 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14372
14373 static unsigned const s_offMarker = 55;
14374 static char const s_szMarker[] = " ; <--- guest";
14375 if (cch < s_offMarker)
14376 {
14377 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
14378 cch = s_offMarker;
14379 }
14380 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
14381 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
14382
14383 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
14384 }
14385 else
14386 {
14387 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
14388 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
14389 cbInstr = 1;
14390 }
14391 GCPhysPc += cbInstr;
14392 offOpcodes += cbInstr;
14393 offRange += cbInstr;
14394 continue;
14395 }
14396
14397 case kIemTbDbgEntryType_ThreadedCall:
14398 pHlp->pfnPrintf(pHlp,
14399 " Call #%u to %s (%u args) - %s\n",
14400 idxThreadedCall,
14401 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14402 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14403 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
14404 idxThreadedCall++;
14405 continue;
14406
14407 case kIemTbDbgEntryType_GuestRegShadowing:
14408 {
14409 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
14410 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
14411 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
14412 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
14413 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14414 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
14415 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
14416 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
14417 else
14418 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
14419 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
14420 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14421 continue;
14422 }
14423
14424 case kIemTbDbgEntryType_Label:
14425 {
14426 const char *pszName = "what_the_fudge";
14427 const char *pszComment = "";
14428 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
14429 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
14430 {
14431 case kIemNativeLabelType_Return:
14432 pszName = "Return";
14433 break;
14434 case kIemNativeLabelType_ReturnBreak:
14435 pszName = "ReturnBreak";
14436 break;
14437 case kIemNativeLabelType_ReturnWithFlags:
14438 pszName = "ReturnWithFlags";
14439 break;
14440 case kIemNativeLabelType_NonZeroRetOrPassUp:
14441 pszName = "NonZeroRetOrPassUp";
14442 break;
14443 case kIemNativeLabelType_RaiseGp0:
14444 pszName = "RaiseGp0";
14445 break;
14446 case kIemNativeLabelType_RaiseNm:
14447 pszName = "RaiseNm";
14448 break;
14449 case kIemNativeLabelType_RaiseUd:
14450 pszName = "RaiseUd";
14451 break;
14452 case kIemNativeLabelType_RaiseMf:
14453 pszName = "RaiseMf";
14454 break;
14455 case kIemNativeLabelType_ObsoleteTb:
14456 pszName = "ObsoleteTb";
14457 break;
14458 case kIemNativeLabelType_NeedCsLimChecking:
14459 pszName = "NeedCsLimChecking";
14460 break;
14461 case kIemNativeLabelType_CheckBranchMiss:
14462 pszName = "CheckBranchMiss";
14463 break;
14464 case kIemNativeLabelType_If:
14465 pszName = "If";
14466 fNumbered = true;
14467 break;
14468 case kIemNativeLabelType_Else:
14469 pszName = "Else";
14470 fNumbered = true;
14471 pszComment = " ; regs state restored pre-if-block";
14472 break;
14473 case kIemNativeLabelType_Endif:
14474 pszName = "Endif";
14475 fNumbered = true;
14476 break;
14477 case kIemNativeLabelType_CheckIrq:
14478 pszName = "CheckIrq_CheckVM";
14479 fNumbered = true;
14480 break;
14481 case kIemNativeLabelType_TlbLookup:
14482 pszName = "TlbLookup";
14483 fNumbered = true;
14484 break;
14485 case kIemNativeLabelType_TlbMiss:
14486 pszName = "TlbMiss";
14487 fNumbered = true;
14488 break;
14489 case kIemNativeLabelType_TlbDone:
14490 pszName = "TlbDone";
14491 fNumbered = true;
14492 break;
14493 case kIemNativeLabelType_Invalid:
14494 case kIemNativeLabelType_End:
14495 break;
14496 }
14497 if (fNumbered)
14498 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
14499 else
14500 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
14501 continue;
14502 }
14503
14504 case kIemTbDbgEntryType_NativeOffset:
14505 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
14506 Assert(offDbgNativeNext > offNative);
14507 break;
14508
14509#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
14510 case kIemTbDbgEntryType_DelayedPcUpdate:
14511 pHlp->pfnPrintf(pHlp,
14512 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
14513 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
14514 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
14515 continue;
14516#endif
14517
14518 default:
14519 AssertFailed();
14520 }
14521 iDbgEntry++;
14522 break;
14523 }
14524 }
14525
14526 /*
14527 * Disassemble the next native instruction.
14528 */
14529 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14530# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14531 uint32_t cbInstr = sizeof(paNative[0]);
14532 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14533 if (RT_SUCCESS(rc))
14534 {
14535# if defined(RT_ARCH_AMD64)
14536 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14537 {
14538 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14539 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14540 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14541 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14542 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14543 uInfo & 0x8000 ? "recompiled" : "todo");
14544 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14545 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14546 else
14547 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14548 }
14549 else
14550# endif
14551 {
14552 const char *pszAnnotation = NULL;
14553# ifdef RT_ARCH_AMD64
14554 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14555 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14556 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14557 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14558 PCDISOPPARAM pMemOp;
14559 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
14560 pMemOp = &Dis.Param1;
14561 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
14562 pMemOp = &Dis.Param2;
14563 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
14564 pMemOp = &Dis.Param3;
14565 else
14566 pMemOp = NULL;
14567 if ( pMemOp
14568 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
14569 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
14570 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
14571 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
14572
14573#elif defined(RT_ARCH_ARM64)
14574 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14575 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14576 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14577# else
14578# error "Port me"
14579# endif
14580 if (pszAnnotation)
14581 {
14582 static unsigned const s_offAnnotation = 55;
14583 size_t const cchAnnotation = strlen(pszAnnotation);
14584 size_t cchDis = strlen(szDisBuf);
14585 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
14586 {
14587 if (cchDis < s_offAnnotation)
14588 {
14589 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
14590 cchDis = s_offAnnotation;
14591 }
14592 szDisBuf[cchDis++] = ' ';
14593 szDisBuf[cchDis++] = ';';
14594 szDisBuf[cchDis++] = ' ';
14595 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
14596 }
14597 }
14598 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14599 }
14600 }
14601 else
14602 {
14603# if defined(RT_ARCH_AMD64)
14604 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14605 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14606# elif defined(RT_ARCH_ARM64)
14607 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14608# else
14609# error "Port me"
14610# endif
14611 cbInstr = sizeof(paNative[0]);
14612 }
14613 offNative += cbInstr / sizeof(paNative[0]);
14614
14615# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14616 cs_insn *pInstr;
14617 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14618 (uintptr_t)pNativeCur, 1, &pInstr);
14619 if (cInstrs > 0)
14620 {
14621 Assert(cInstrs == 1);
14622 const char *pszAnnotation = NULL;
14623# if defined(RT_ARCH_ARM64)
14624 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
14625 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
14626 {
14627 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
14628 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
14629 char *psz = strchr(pInstr->op_str, '[');
14630 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
14631 {
14632 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
14633 int32_t off = -1;
14634 psz += 4;
14635 if (*psz == ']')
14636 off = 0;
14637 else if (*psz == ',')
14638 {
14639 psz = RTStrStripL(psz + 1);
14640 if (*psz == '#')
14641 off = RTStrToInt32(&psz[1]);
14642 /** @todo deal with index registers and LSL as well... */
14643 }
14644 if (off >= 0)
14645 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
14646 }
14647 }
14648# endif
14649
14650 size_t const cchOp = strlen(pInstr->op_str);
14651# if defined(RT_ARCH_AMD64)
14652 if (pszAnnotation)
14653 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
14654 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
14655 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
14656 else
14657 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14658 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14659
14660# else
14661 if (pszAnnotation)
14662 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
14663 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
14664 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
14665 else
14666 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14667 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14668# endif
14669 offNative += pInstr->size / sizeof(*pNativeCur);
14670 cs_free(pInstr, cInstrs);
14671 }
14672 else
14673 {
14674# if defined(RT_ARCH_AMD64)
14675 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14676 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14677# else
14678 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14679# endif
14680 offNative++;
14681 }
14682# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14683 }
14684 }
14685 else
14686#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
14687 {
14688 /*
14689 * No debug info, just disassemble the x86 code and then the native code.
14690 *
14691 * First the guest code:
14692 */
14693 for (unsigned i = 0; i < pTb->cRanges; i++)
14694 {
14695 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
14696 + (pTb->aRanges[i].idxPhysPage == 0
14697 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14698 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
14699 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14700 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
14701 unsigned off = pTb->aRanges[i].offOpcodes;
14702 /** @todo this ain't working when crossing pages! */
14703 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
14704 while (off < cbOpcodes)
14705 {
14706 uint32_t cbInstr = 1;
14707 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14708 &pTb->pabOpcodes[off], cbOpcodes - off,
14709 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14710 if (RT_SUCCESS(rc))
14711 {
14712 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14713 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14714 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14715 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14716 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
14717 GCPhysPc += cbInstr;
14718 off += cbInstr;
14719 }
14720 else
14721 {
14722 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
14723 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
14724 break;
14725 }
14726 }
14727 }
14728
14729 /*
14730 * Then the native code:
14731 */
14732 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
14733 while (offNative < cNative)
14734 {
14735 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14736# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14737 uint32_t cbInstr = sizeof(paNative[0]);
14738 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14739 if (RT_SUCCESS(rc))
14740 {
14741# if defined(RT_ARCH_AMD64)
14742 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14743 {
14744 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14745 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14746 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14747 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14748 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14749 uInfo & 0x8000 ? "recompiled" : "todo");
14750 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14751 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14752 else
14753 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14754 }
14755 else
14756# endif
14757 {
14758# ifdef RT_ARCH_AMD64
14759 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14760 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14761 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14762 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14763# elif defined(RT_ARCH_ARM64)
14764 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14765 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14766 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14767# else
14768# error "Port me"
14769# endif
14770 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14771 }
14772 }
14773 else
14774 {
14775# if defined(RT_ARCH_AMD64)
14776 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14777 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14778# else
14779 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14780# endif
14781 cbInstr = sizeof(paNative[0]);
14782 }
14783 offNative += cbInstr / sizeof(paNative[0]);
14784
14785# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14786 cs_insn *pInstr;
14787 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14788 (uintptr_t)pNativeCur, 1, &pInstr);
14789 if (cInstrs > 0)
14790 {
14791 Assert(cInstrs == 1);
14792# if defined(RT_ARCH_AMD64)
14793 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14794 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14795# else
14796 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14797 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14798# endif
14799 offNative += pInstr->size / sizeof(*pNativeCur);
14800 cs_free(pInstr, cInstrs);
14801 }
14802 else
14803 {
14804# if defined(RT_ARCH_AMD64)
14805 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14806 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14807# else
14808 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14809# endif
14810 offNative++;
14811 }
14812# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14813 }
14814 }
14815
14816#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14817 /* Cleanup. */
14818 cs_close(&hDisasm);
14819#endif
14820}
14821
14822
14823/**
14824 * Recompiles the given threaded TB into a native one.
14825 *
14826 * In case of failure the translation block will be returned as-is.
14827 *
14828 * @returns pTb.
14829 * @param pVCpu The cross context virtual CPU structure of the calling
14830 * thread.
14831 * @param pTb The threaded translation to recompile to native.
14832 */
14833DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
14834{
14835 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
14836
14837 /*
14838 * The first time thru, we allocate the recompiler state, the other times
14839 * we just need to reset it before using it again.
14840 */
14841 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
14842 if (RT_LIKELY(pReNative))
14843 iemNativeReInit(pReNative, pTb);
14844 else
14845 {
14846 pReNative = iemNativeInit(pVCpu, pTb);
14847 AssertReturn(pReNative, pTb);
14848 }
14849
14850#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14851 /*
14852 * First do liveness analysis. This is done backwards.
14853 */
14854 {
14855 uint32_t idxCall = pTb->Thrd.cCalls;
14856 if (idxCall <= pReNative->cLivenessEntriesAlloc)
14857 { /* likely */ }
14858 else
14859 {
14860 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
14861 while (idxCall > cAlloc)
14862 cAlloc *= 2;
14863 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
14864 AssertReturn(pvNew, pTb);
14865 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
14866 pReNative->cLivenessEntriesAlloc = cAlloc;
14867 }
14868 AssertReturn(idxCall > 0, pTb);
14869 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
14870
14871 /* The initial (final) entry. */
14872 idxCall--;
14873 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
14874
14875 /* Loop backwards thru the calls and fill in the other entries. */
14876 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
14877 while (idxCall > 0)
14878 {
14879 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
14880 if (pfnLiveness)
14881 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
14882 else
14883 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
14884 pCallEntry--;
14885 idxCall--;
14886 }
14887
14888# ifdef VBOX_WITH_STATISTICS
14889 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
14890 to 'clobbered' rather that 'input'. */
14891 /** @todo */
14892# endif
14893 }
14894#endif
14895
14896 /*
14897 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
14898 * for aborting if an error happens.
14899 */
14900 uint32_t cCallsLeft = pTb->Thrd.cCalls;
14901#ifdef LOG_ENABLED
14902 uint32_t const cCallsOrg = cCallsLeft;
14903#endif
14904 uint32_t off = 0;
14905 int rc = VINF_SUCCESS;
14906 IEMNATIVE_TRY_SETJMP(pReNative, rc)
14907 {
14908 /*
14909 * Emit prolog code (fixed).
14910 */
14911 off = iemNativeEmitProlog(pReNative, off);
14912
14913 /*
14914 * Convert the calls to native code.
14915 */
14916#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14917 int32_t iGstInstr = -1;
14918#endif
14919#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
14920 uint32_t cThreadedCalls = 0;
14921 uint32_t cRecompiledCalls = 0;
14922#endif
14923#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14924 uint32_t idxCurCall = 0;
14925#endif
14926 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
14927 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
14928 while (cCallsLeft-- > 0)
14929 {
14930 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
14931#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14932 pReNative->idxCurCall = idxCurCall;
14933#endif
14934
14935 /*
14936 * Debug info, assembly markup and statistics.
14937 */
14938#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
14939 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
14940 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
14941#endif
14942#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14943 iemNativeDbgInfoAddNativeOffset(pReNative, off);
14944 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
14945 {
14946 if (iGstInstr < (int32_t)pTb->cInstructions)
14947 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
14948 else
14949 Assert(iGstInstr == pTb->cInstructions);
14950 iGstInstr = pCallEntry->idxInstr;
14951 }
14952 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
14953#endif
14954#if defined(VBOX_STRICT)
14955 off = iemNativeEmitMarker(pReNative, off,
14956 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
14957#endif
14958#if defined(VBOX_STRICT)
14959 iemNativeRegAssertSanity(pReNative);
14960#endif
14961#ifdef VBOX_WITH_STATISTICS
14962 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
14963#endif
14964
14965 /*
14966 * Actual work.
14967 */
14968 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
14969 pfnRecom ? "(recompiled)" : "(todo)"));
14970 if (pfnRecom) /** @todo stats on this. */
14971 {
14972 off = pfnRecom(pReNative, off, pCallEntry);
14973 STAM_REL_STATS({cRecompiledCalls++;});
14974 }
14975 else
14976 {
14977 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
14978 STAM_REL_STATS({cThreadedCalls++;});
14979 }
14980 Assert(off <= pReNative->cInstrBufAlloc);
14981 Assert(pReNative->cCondDepth == 0);
14982
14983#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
14984 if (LogIs2Enabled())
14985 {
14986 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
14987# ifndef IEMLIVENESS_EXTENDED_LAYOUT
14988 static const char s_achState[] = "CUXI";
14989# else
14990 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
14991# endif
14992
14993 char szGpr[17];
14994 for (unsigned i = 0; i < 16; i++)
14995 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
14996 szGpr[16] = '\0';
14997
14998 char szSegBase[X86_SREG_COUNT + 1];
14999 char szSegLimit[X86_SREG_COUNT + 1];
15000 char szSegAttrib[X86_SREG_COUNT + 1];
15001 char szSegSel[X86_SREG_COUNT + 1];
15002 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
15003 {
15004 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
15005 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
15006 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
15007 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
15008 }
15009 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
15010 = szSegSel[X86_SREG_COUNT] = '\0';
15011
15012 char szEFlags[8];
15013 for (unsigned i = 0; i < 7; i++)
15014 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
15015 szEFlags[7] = '\0';
15016
15017 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
15018 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
15019 }
15020#endif
15021
15022 /*
15023 * Advance.
15024 */
15025 pCallEntry++;
15026#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
15027 idxCurCall++;
15028#endif
15029 }
15030
15031 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
15032 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
15033 if (!cThreadedCalls)
15034 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
15035
15036 /*
15037 * Emit the epilog code.
15038 */
15039 uint32_t idxReturnLabel;
15040 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
15041
15042 /*
15043 * Generate special jump labels.
15044 */
15045 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
15046 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
15047 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
15048 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
15049 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
15050 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
15051 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
15052 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
15053 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
15054 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
15055 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
15056 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
15057 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
15058 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
15059 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
15060 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
15061 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
15062 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
15063 }
15064 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
15065 {
15066 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
15067 return pTb;
15068 }
15069 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
15070 Assert(off <= pReNative->cInstrBufAlloc);
15071
15072 /*
15073 * Make sure all labels has been defined.
15074 */
15075 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
15076#ifdef VBOX_STRICT
15077 uint32_t const cLabels = pReNative->cLabels;
15078 for (uint32_t i = 0; i < cLabels; i++)
15079 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
15080#endif
15081
15082 /*
15083 * Allocate executable memory, copy over the code we've generated.
15084 */
15085 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
15086 if (pTbAllocator->pDelayedFreeHead)
15087 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
15088
15089 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
15090 AssertReturn(paFinalInstrBuf, pTb);
15091 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
15092
15093 /*
15094 * Apply fixups.
15095 */
15096 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
15097 uint32_t const cFixups = pReNative->cFixups;
15098 for (uint32_t i = 0; i < cFixups; i++)
15099 {
15100 Assert(paFixups[i].off < off);
15101 Assert(paFixups[i].idxLabel < cLabels);
15102 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
15103 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
15104 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
15105 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
15106 switch (paFixups[i].enmType)
15107 {
15108#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
15109 case kIemNativeFixupType_Rel32:
15110 Assert(paFixups[i].off + 4 <= off);
15111 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15112 continue;
15113
15114#elif defined(RT_ARCH_ARM64)
15115 case kIemNativeFixupType_RelImm26At0:
15116 {
15117 Assert(paFixups[i].off < off);
15118 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15119 Assert(offDisp >= -262144 && offDisp < 262144);
15120 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
15121 continue;
15122 }
15123
15124 case kIemNativeFixupType_RelImm19At5:
15125 {
15126 Assert(paFixups[i].off < off);
15127 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15128 Assert(offDisp >= -262144 && offDisp < 262144);
15129 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
15130 continue;
15131 }
15132
15133 case kIemNativeFixupType_RelImm14At5:
15134 {
15135 Assert(paFixups[i].off < off);
15136 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15137 Assert(offDisp >= -8192 && offDisp < 8192);
15138 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
15139 continue;
15140 }
15141
15142#endif
15143 case kIemNativeFixupType_Invalid:
15144 case kIemNativeFixupType_End:
15145 break;
15146 }
15147 AssertFailed();
15148 }
15149
15150 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
15151 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
15152
15153 /*
15154 * Convert the translation block.
15155 */
15156 RTMemFree(pTb->Thrd.paCalls);
15157 pTb->Native.paInstructions = paFinalInstrBuf;
15158 pTb->Native.cInstructions = off;
15159 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
15160#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15161 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
15162 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
15163#endif
15164
15165 Assert(pTbAllocator->cThreadedTbs > 0);
15166 pTbAllocator->cThreadedTbs -= 1;
15167 pTbAllocator->cNativeTbs += 1;
15168 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
15169
15170#ifdef LOG_ENABLED
15171 /*
15172 * Disassemble to the log if enabled.
15173 */
15174 if (LogIs3Enabled())
15175 {
15176 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
15177 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
15178# ifdef DEBUG_bird
15179 RTLogFlush(NULL);
15180# endif
15181 }
15182#endif
15183 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
15184
15185 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
15186 return pTb;
15187}
15188
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette