VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103660

Last change on this file since 103660 was 103660, checked in by vboxsync, 12 months ago

VMM/IEM: Native translation of IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() body, (not tested as AVX is not exposed to the guest right now due to missing instruction emulations, there is a breakpoint emitted so this can be tested when it gets used actually), bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 663.7 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103660 2024-03-04 11:25:11Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when it wants to raise a \#NM.
1601 */
1602IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1603{
1604 iemRaiseDeviceNotAvailableJmp(pVCpu);
1605#ifndef _MSC_VER
1606 return VINF_IEM_RAISED_XCPT; /* not reached */
1607#endif
1608}
1609
1610
1611/**
1612 * Used by TB code when it wants to raise a \#UD.
1613 */
1614IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1615{
1616 iemRaiseUndefinedOpcodeJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when detecting opcode changes.
1625 * @see iemThreadeFuncWorkerObsoleteTb
1626 */
1627IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1628{
1629 /* We set fSafeToFree to false where as we're being called in the context
1630 of a TB callback function, which for native TBs means we cannot release
1631 the executable memory till we've returned our way back to iemTbExec as
1632 that return path codes via the native code generated for the TB. */
1633 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1634 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1635 return VINF_IEM_REEXEC_BREAK;
1636}
1637
1638
1639/**
1640 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1643{
1644 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1645 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1646 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1647 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1648 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1649 return VINF_IEM_REEXEC_BREAK;
1650}
1651
1652
1653/**
1654 * Used by TB code when we missed a PC check after a branch.
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1657{
1658 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1659 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1660 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1661 pVCpu->iem.s.pbInstrBuf));
1662 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667
1668/*********************************************************************************************************************************
1669* Helpers: Segmented memory fetches and stores. *
1670*********************************************************************************************************************************/
1671
1672/**
1673 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1674 */
1675IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1676{
1677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1678 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1679#else
1680 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1681#endif
1682}
1683
1684
1685/**
1686 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1687 * to 16 bits.
1688 */
1689IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1690{
1691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1692 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1693#else
1694 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1695#endif
1696}
1697
1698
1699/**
1700 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1701 * to 32 bits.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712/**
1713 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1714 * to 64 bits.
1715 */
1716IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1717{
1718#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1719 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1720#else
1721 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1722#endif
1723}
1724
1725
1726/**
1727 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1728 */
1729IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1730{
1731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1732 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1733#else
1734 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1735#endif
1736}
1737
1738
1739/**
1740 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1741 * to 32 bits.
1742 */
1743IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1744{
1745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1746 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1747#else
1748 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1749#endif
1750}
1751
1752
1753/**
1754 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1755 * to 64 bits.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1769 */
1770IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1771{
1772#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1773 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1774#else
1775 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1776#endif
1777}
1778
1779
1780/**
1781 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1782 * to 64 bits.
1783 */
1784IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1785{
1786#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1787 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1788#else
1789 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1790#endif
1791}
1792
1793
1794/**
1795 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1796 */
1797IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1798{
1799#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1800 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1801#else
1802 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1803#endif
1804}
1805
1806
1807/**
1808 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1809 */
1810IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1811{
1812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1813 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1814#else
1815 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1816#endif
1817}
1818
1819
1820/**
1821 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1826 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1827#else
1828 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1839 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1840#else
1841 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1848 */
1849IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1850{
1851#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1852 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1853#else
1854 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1855#endif
1856}
1857
1858
1859
1860/**
1861 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1866 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1867#else
1868 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1869#endif
1870}
1871
1872
1873/**
1874 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1875 */
1876IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1877{
1878#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1879 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1880#else
1881 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1882#endif
1883}
1884
1885
1886/**
1887 * Used by TB code to store an 32-bit selector value onto a generic stack.
1888 *
1889 * Intel CPUs doesn't do write a whole dword, thus the special function.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1895#else
1896 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1908#else
1909 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1916 */
1917IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1918{
1919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1920 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1921#else
1922 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1923#endif
1924}
1925
1926
1927/**
1928 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1929 */
1930IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1931{
1932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1933 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1934#else
1935 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1936#endif
1937}
1938
1939
1940/**
1941 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1946 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1947#else
1948 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1949#endif
1950}
1951
1952
1953
1954/*********************************************************************************************************************************
1955* Helpers: Flat memory fetches and stores. *
1956*********************************************************************************************************************************/
1957
1958/**
1959 * Used by TB code to load unsigned 8-bit data w/ flat address.
1960 * @note Zero extending the value to 64-bit to simplify assembly.
1961 */
1962IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1963{
1964#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1965 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1966#else
1967 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1968#endif
1969}
1970
1971
1972/**
1973 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1974 * to 16 bits.
1975 * @note Zero extending the value to 64-bit to simplify assembly.
1976 */
1977IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1978{
1979#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1980 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1981#else
1982 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1983#endif
1984}
1985
1986
1987/**
1988 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1989 * to 32 bits.
1990 * @note Zero extending the value to 64-bit to simplify assembly.
1991 */
1992IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1993{
1994#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1995 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1996#else
1997 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1998#endif
1999}
2000
2001
2002/**
2003 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2004 * to 64 bits.
2005 */
2006IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2007{
2008#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2009 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2010#else
2011 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2012#endif
2013}
2014
2015
2016/**
2017 * Used by TB code to load unsigned 16-bit data w/ flat address.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2032 * to 32 bits.
2033 * @note Zero extending the value to 64-bit to simplify assembly.
2034 */
2035IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2036{
2037#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2038 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2039#else
2040 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2041#endif
2042}
2043
2044
2045/**
2046 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2047 * to 64 bits.
2048 * @note Zero extending the value to 64-bit to simplify assembly.
2049 */
2050IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2051{
2052#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2053 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2054#else
2055 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2056#endif
2057}
2058
2059
2060/**
2061 * Used by TB code to load unsigned 32-bit data w/ flat address.
2062 * @note Zero extending the value to 64-bit to simplify assembly.
2063 */
2064IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2065{
2066#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2067 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2068#else
2069 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2070#endif
2071}
2072
2073
2074/**
2075 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2076 * to 64 bits.
2077 * @note Zero extending the value to 64-bit to simplify assembly.
2078 */
2079IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2080{
2081#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2082 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2083#else
2084 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2085#endif
2086}
2087
2088
2089/**
2090 * Used by TB code to load unsigned 64-bit data w/ flat address.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to store unsigned 8-bit data w/ flat address.
2104 */
2105IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2106{
2107#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2108 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2109#else
2110 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2111#endif
2112}
2113
2114
2115/**
2116 * Used by TB code to store unsigned 16-bit data w/ flat address.
2117 */
2118IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2119{
2120#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2121 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2122#else
2123 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2124#endif
2125}
2126
2127
2128/**
2129 * Used by TB code to store unsigned 32-bit data w/ flat address.
2130 */
2131IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2132{
2133#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2134 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2135#else
2136 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2137#endif
2138}
2139
2140
2141/**
2142 * Used by TB code to store unsigned 64-bit data w/ flat address.
2143 */
2144IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2145{
2146#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2147 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2148#else
2149 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2150#endif
2151}
2152
2153
2154
2155/**
2156 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2157 */
2158IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2159{
2160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2161 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2162#else
2163 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2164#endif
2165}
2166
2167
2168/**
2169 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2172{
2173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2174 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2175#else
2176 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2177#endif
2178}
2179
2180
2181/**
2182 * Used by TB code to store a segment selector value onto a flat stack.
2183 *
2184 * Intel CPUs doesn't do write a whole dword, thus the special function.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2190#else
2191 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2203#else
2204 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2211 */
2212IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2213{
2214#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2215 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2216#else
2217 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2218#endif
2219}
2220
2221
2222/**
2223 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2224 */
2225IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2226{
2227#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2228 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2229#else
2230 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2231#endif
2232}
2233
2234
2235/**
2236 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2237 */
2238IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2239{
2240#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2241 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2242#else
2243 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2244#endif
2245}
2246
2247
2248
2249/*********************************************************************************************************************************
2250* Helpers: Segmented memory mapping. *
2251*********************************************************************************************************************************/
2252
2253/**
2254 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2255 * segmentation.
2256 */
2257IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2258 RTGCPTR GCPtrMem, uint8_t iSegReg))
2259{
2260#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2261 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2262#else
2263 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2264#endif
2265}
2266
2267
2268/**
2269 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2270 */
2271IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2272 RTGCPTR GCPtrMem, uint8_t iSegReg))
2273{
2274#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2275 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2276#else
2277 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2278#endif
2279}
2280
2281
2282/**
2283 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2312 * segmentation.
2313 */
2314IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2315 RTGCPTR GCPtrMem, uint8_t iSegReg))
2316{
2317#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2318 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2319#else
2320 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2321#endif
2322}
2323
2324
2325/**
2326 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2327 */
2328IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2329 RTGCPTR GCPtrMem, uint8_t iSegReg))
2330{
2331#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2332 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2333#else
2334 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2335#endif
2336}
2337
2338
2339/**
2340 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2369 * segmentation.
2370 */
2371IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2372 RTGCPTR GCPtrMem, uint8_t iSegReg))
2373{
2374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2375 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2376#else
2377 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2378#endif
2379}
2380
2381
2382/**
2383 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2384 */
2385IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2386 RTGCPTR GCPtrMem, uint8_t iSegReg))
2387{
2388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2389 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2390#else
2391 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2392#endif
2393}
2394
2395
2396/**
2397 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2426 * segmentation.
2427 */
2428IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2429 RTGCPTR GCPtrMem, uint8_t iSegReg))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2432 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2433#else
2434 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2443 RTGCPTR GCPtrMem, uint8_t iSegReg))
2444{
2445#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2446 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2447#else
2448 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2449#endif
2450}
2451
2452
2453/**
2454 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2511 * segmentation.
2512 */
2513IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2514 RTGCPTR GCPtrMem, uint8_t iSegReg))
2515{
2516#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2517 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2518#else
2519 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2520#endif
2521}
2522
2523
2524/**
2525 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2526 */
2527IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2528 RTGCPTR GCPtrMem, uint8_t iSegReg))
2529{
2530#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2531 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2532#else
2533 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2534#endif
2535}
2536
2537
2538/**
2539 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/*********************************************************************************************************************************
2567* Helpers: Flat memory mapping. *
2568*********************************************************************************************************************************/
2569
2570/**
2571 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2572 * address.
2573 */
2574IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2575{
2576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2577 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2578#else
2579 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2580#endif
2581}
2582
2583
2584/**
2585 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2586 */
2587IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2588{
2589#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2590 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2591#else
2592 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2593#endif
2594}
2595
2596
2597/**
2598 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2599 */
2600IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2601{
2602#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2603 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2604#else
2605 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2606#endif
2607}
2608
2609
2610/**
2611 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2612 */
2613IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2614{
2615#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2616 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2617#else
2618 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2619#endif
2620}
2621
2622
2623/**
2624 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2625 * address.
2626 */
2627IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2628{
2629#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2630 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2631#else
2632 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2633#endif
2634}
2635
2636
2637/**
2638 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2639 */
2640IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2644#else
2645 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2654{
2655#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2656 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2657#else
2658 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2659#endif
2660}
2661
2662
2663/**
2664 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2665 */
2666IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2667{
2668#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2669 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2670#else
2671 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2672#endif
2673}
2674
2675
2676/**
2677 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2678 * address.
2679 */
2680IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2681{
2682#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2683 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2684#else
2685 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2686#endif
2687}
2688
2689
2690/**
2691 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2692 */
2693IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2694{
2695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2696 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2697#else
2698 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2699#endif
2700}
2701
2702
2703/**
2704 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2705 */
2706IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2707{
2708#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2709 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2710#else
2711 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2712#endif
2713}
2714
2715
2716/**
2717 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2718 */
2719IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2720{
2721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2722 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2723#else
2724 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2725#endif
2726}
2727
2728
2729/**
2730 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2731 * address.
2732 */
2733IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2734{
2735#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2736 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2737#else
2738 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2739#endif
2740}
2741
2742
2743/**
2744 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2745 */
2746IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2747{
2748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2749 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2750#else
2751 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2752#endif
2753}
2754
2755
2756/**
2757 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2758 */
2759IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2760{
2761#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2762 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2763#else
2764 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2765#endif
2766}
2767
2768
2769/**
2770 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2771 */
2772IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2773{
2774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2775 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2776#else
2777 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2778#endif
2779}
2780
2781
2782/**
2783 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2784 */
2785IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2786{
2787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2788 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2789#else
2790 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2791#endif
2792}
2793
2794
2795/**
2796 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2797 */
2798IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2799{
2800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2801 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2802#else
2803 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2804#endif
2805}
2806
2807
2808/**
2809 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2810 * address.
2811 */
2812IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2813{
2814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2815 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2816#else
2817 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2818#endif
2819}
2820
2821
2822/**
2823 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2824 */
2825IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2826{
2827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2828 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2829#else
2830 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2831#endif
2832}
2833
2834
2835/**
2836 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2839{
2840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2841 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2842#else
2843 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2844#endif
2845}
2846
2847
2848/**
2849 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2850 */
2851IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2852{
2853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2854 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2855#else
2856 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2857#endif
2858}
2859
2860
2861/*********************************************************************************************************************************
2862* Helpers: Commit, rollback & unmap *
2863*********************************************************************************************************************************/
2864
2865/**
2866 * Used by TB code to commit and unmap a read-write memory mapping.
2867 */
2868IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2869{
2870 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2871}
2872
2873
2874/**
2875 * Used by TB code to commit and unmap a read-write memory mapping.
2876 */
2877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2878{
2879 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2880}
2881
2882
2883/**
2884 * Used by TB code to commit and unmap a write-only memory mapping.
2885 */
2886IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2887{
2888 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2889}
2890
2891
2892/**
2893 * Used by TB code to commit and unmap a read-only memory mapping.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2896{
2897 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2898}
2899
2900
2901/**
2902 * Reinitializes the native recompiler state.
2903 *
2904 * Called before starting a new recompile job.
2905 */
2906static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2907{
2908 pReNative->cLabels = 0;
2909 pReNative->bmLabelTypes = 0;
2910 pReNative->cFixups = 0;
2911#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2912 pReNative->pDbgInfo->cEntries = 0;
2913#endif
2914 pReNative->pTbOrg = pTb;
2915 pReNative->cCondDepth = 0;
2916 pReNative->uCondSeqNo = 0;
2917 pReNative->uCheckIrqSeqNo = 0;
2918 pReNative->uTlbSeqNo = 0;
2919
2920#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2921 pReNative->Core.offPc = 0;
2922 pReNative->Core.cInstrPcUpdateSkipped = 0;
2923#endif
2924 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2925#if IEMNATIVE_HST_GREG_COUNT < 32
2926 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2927#endif
2928 ;
2929 pReNative->Core.bmHstRegsWithGstShadow = 0;
2930 pReNative->Core.bmGstRegShadows = 0;
2931 pReNative->Core.bmVars = 0;
2932 pReNative->Core.bmStack = 0;
2933 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2934 pReNative->Core.u64ArgVars = UINT64_MAX;
2935
2936 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 11);
2937 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2938 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2939 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2940 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2941 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2942 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2943 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2944 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2945 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2946 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2947 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2948
2949 /* Full host register reinit: */
2950 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2951 {
2952 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2953 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2954 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2955 }
2956
2957 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2958 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2959#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2960 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2961#endif
2962#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2963 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2964#endif
2965#ifdef IEMNATIVE_REG_FIXED_TMP1
2966 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2967#endif
2968#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2969 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2970#endif
2971 );
2972 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2973 {
2974 fRegs &= ~RT_BIT_32(idxReg);
2975 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2976 }
2977
2978 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2979#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2980 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2981#endif
2982#ifdef IEMNATIVE_REG_FIXED_TMP0
2983 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2984#endif
2985#ifdef IEMNATIVE_REG_FIXED_TMP1
2986 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2987#endif
2988#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2989 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2990#endif
2991 return pReNative;
2992}
2993
2994
2995/**
2996 * Allocates and initializes the native recompiler state.
2997 *
2998 * This is called the first time an EMT wants to recompile something.
2999 *
3000 * @returns Pointer to the new recompiler state.
3001 * @param pVCpu The cross context virtual CPU structure of the calling
3002 * thread.
3003 * @param pTb The TB that's about to be recompiled.
3004 * @thread EMT(pVCpu)
3005 */
3006static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3007{
3008 VMCPU_ASSERT_EMT(pVCpu);
3009
3010 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3011 AssertReturn(pReNative, NULL);
3012
3013 /*
3014 * Try allocate all the buffers and stuff we need.
3015 */
3016 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3017 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3018 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3019#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3020 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3021#endif
3022 if (RT_LIKELY( pReNative->pInstrBuf
3023 && pReNative->paLabels
3024 && pReNative->paFixups)
3025#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3026 && pReNative->pDbgInfo
3027#endif
3028 )
3029 {
3030 /*
3031 * Set the buffer & array sizes on success.
3032 */
3033 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3034 pReNative->cLabelsAlloc = _8K;
3035 pReNative->cFixupsAlloc = _16K;
3036#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3037 pReNative->cDbgInfoAlloc = _16K;
3038#endif
3039
3040 /* Other constant stuff: */
3041 pReNative->pVCpu = pVCpu;
3042
3043 /*
3044 * Done, just need to save it and reinit it.
3045 */
3046 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3047 return iemNativeReInit(pReNative, pTb);
3048 }
3049
3050 /*
3051 * Failed. Cleanup and return.
3052 */
3053 AssertFailed();
3054 RTMemFree(pReNative->pInstrBuf);
3055 RTMemFree(pReNative->paLabels);
3056 RTMemFree(pReNative->paFixups);
3057#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3058 RTMemFree(pReNative->pDbgInfo);
3059#endif
3060 RTMemFree(pReNative);
3061 return NULL;
3062}
3063
3064
3065/**
3066 * Creates a label
3067 *
3068 * If the label does not yet have a defined position,
3069 * call iemNativeLabelDefine() later to set it.
3070 *
3071 * @returns Label ID. Throws VBox status code on failure, so no need to check
3072 * the return value.
3073 * @param pReNative The native recompile state.
3074 * @param enmType The label type.
3075 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3076 * label is not yet defined (default).
3077 * @param uData Data associated with the lable. Only applicable to
3078 * certain type of labels. Default is zero.
3079 */
3080DECL_HIDDEN_THROW(uint32_t)
3081iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3082 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3083{
3084 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3085
3086 /*
3087 * Locate existing label definition.
3088 *
3089 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3090 * and uData is zero.
3091 */
3092 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3093 uint32_t const cLabels = pReNative->cLabels;
3094 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3095#ifndef VBOX_STRICT
3096 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3097 && offWhere == UINT32_MAX
3098 && uData == 0
3099#endif
3100 )
3101 {
3102#ifndef VBOX_STRICT
3103 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3104 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3105 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3106 if (idxLabel < pReNative->cLabels)
3107 return idxLabel;
3108#else
3109 for (uint32_t i = 0; i < cLabels; i++)
3110 if ( paLabels[i].enmType == enmType
3111 && paLabels[i].uData == uData)
3112 {
3113 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3114 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3115 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3116 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3117 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3118 return i;
3119 }
3120 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3121 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3122#endif
3123 }
3124
3125 /*
3126 * Make sure we've got room for another label.
3127 */
3128 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3129 { /* likely */ }
3130 else
3131 {
3132 uint32_t cNew = pReNative->cLabelsAlloc;
3133 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3134 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3135 cNew *= 2;
3136 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3137 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3138 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3139 pReNative->paLabels = paLabels;
3140 pReNative->cLabelsAlloc = cNew;
3141 }
3142
3143 /*
3144 * Define a new label.
3145 */
3146 paLabels[cLabels].off = offWhere;
3147 paLabels[cLabels].enmType = enmType;
3148 paLabels[cLabels].uData = uData;
3149 pReNative->cLabels = cLabels + 1;
3150
3151 Assert((unsigned)enmType < 64);
3152 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3153
3154 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3155 {
3156 Assert(uData == 0);
3157 pReNative->aidxUniqueLabels[enmType] = cLabels;
3158 }
3159
3160 if (offWhere != UINT32_MAX)
3161 {
3162#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3163 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3164 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3165#endif
3166 }
3167 return cLabels;
3168}
3169
3170
3171/**
3172 * Defines the location of an existing label.
3173 *
3174 * @param pReNative The native recompile state.
3175 * @param idxLabel The label to define.
3176 * @param offWhere The position.
3177 */
3178DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3179{
3180 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3181 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3182 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3183 pLabel->off = offWhere;
3184#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3185 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3186 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3187#endif
3188}
3189
3190
3191/**
3192 * Looks up a lable.
3193 *
3194 * @returns Label ID if found, UINT32_MAX if not.
3195 */
3196static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3197 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3198{
3199 Assert((unsigned)enmType < 64);
3200 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3201 {
3202 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3203 return pReNative->aidxUniqueLabels[enmType];
3204
3205 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3206 uint32_t const cLabels = pReNative->cLabels;
3207 for (uint32_t i = 0; i < cLabels; i++)
3208 if ( paLabels[i].enmType == enmType
3209 && paLabels[i].uData == uData
3210 && ( paLabels[i].off == offWhere
3211 || offWhere == UINT32_MAX
3212 || paLabels[i].off == UINT32_MAX))
3213 return i;
3214 }
3215 return UINT32_MAX;
3216}
3217
3218
3219/**
3220 * Adds a fixup.
3221 *
3222 * @throws VBox status code (int) on failure.
3223 * @param pReNative The native recompile state.
3224 * @param offWhere The instruction offset of the fixup location.
3225 * @param idxLabel The target label ID for the fixup.
3226 * @param enmType The fixup type.
3227 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3228 */
3229DECL_HIDDEN_THROW(void)
3230iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3231 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3232{
3233 Assert(idxLabel <= UINT16_MAX);
3234 Assert((unsigned)enmType <= UINT8_MAX);
3235
3236 /*
3237 * Make sure we've room.
3238 */
3239 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3240 uint32_t const cFixups = pReNative->cFixups;
3241 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3242 { /* likely */ }
3243 else
3244 {
3245 uint32_t cNew = pReNative->cFixupsAlloc;
3246 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3247 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3248 cNew *= 2;
3249 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3250 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3251 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3252 pReNative->paFixups = paFixups;
3253 pReNative->cFixupsAlloc = cNew;
3254 }
3255
3256 /*
3257 * Add the fixup.
3258 */
3259 paFixups[cFixups].off = offWhere;
3260 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3261 paFixups[cFixups].enmType = enmType;
3262 paFixups[cFixups].offAddend = offAddend;
3263 pReNative->cFixups = cFixups + 1;
3264}
3265
3266
3267/**
3268 * Slow code path for iemNativeInstrBufEnsure.
3269 */
3270DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3271{
3272 /* Double the buffer size till we meet the request. */
3273 uint32_t cNew = pReNative->cInstrBufAlloc;
3274 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3275 do
3276 cNew *= 2;
3277 while (cNew < off + cInstrReq);
3278
3279 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3280#ifdef RT_ARCH_ARM64
3281 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3282#else
3283 uint32_t const cbMaxInstrBuf = _2M;
3284#endif
3285 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3286
3287 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3288 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3289
3290#ifdef VBOX_STRICT
3291 pReNative->offInstrBufChecked = off + cInstrReq;
3292#endif
3293 pReNative->cInstrBufAlloc = cNew;
3294 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3295}
3296
3297#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3298
3299/**
3300 * Grows the static debug info array used during recompilation.
3301 *
3302 * @returns Pointer to the new debug info block; throws VBox status code on
3303 * failure, so no need to check the return value.
3304 */
3305DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3306{
3307 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3308 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3309 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3310 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3311 pReNative->pDbgInfo = pDbgInfo;
3312 pReNative->cDbgInfoAlloc = cNew;
3313 return pDbgInfo;
3314}
3315
3316
3317/**
3318 * Adds a new debug info uninitialized entry, returning the pointer to it.
3319 */
3320DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3321{
3322 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3323 { /* likely */ }
3324 else
3325 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3326 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3327}
3328
3329
3330/**
3331 * Debug Info: Adds a native offset record, if necessary.
3332 */
3333static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3334{
3335 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3336
3337 /*
3338 * Search backwards to see if we've got a similar record already.
3339 */
3340 uint32_t idx = pDbgInfo->cEntries;
3341 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3342 while (idx-- > idxStop)
3343 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3344 {
3345 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3346 return;
3347 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3348 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3349 break;
3350 }
3351
3352 /*
3353 * Add it.
3354 */
3355 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3356 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3357 pEntry->NativeOffset.offNative = off;
3358}
3359
3360
3361/**
3362 * Debug Info: Record info about a label.
3363 */
3364static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3365{
3366 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3367 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3368 pEntry->Label.uUnused = 0;
3369 pEntry->Label.enmLabel = (uint8_t)enmType;
3370 pEntry->Label.uData = uData;
3371}
3372
3373
3374/**
3375 * Debug Info: Record info about a threaded call.
3376 */
3377static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3378{
3379 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3380 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3381 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3382 pEntry->ThreadedCall.uUnused = 0;
3383 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3384}
3385
3386
3387/**
3388 * Debug Info: Record info about a new guest instruction.
3389 */
3390static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3391{
3392 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3393 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3394 pEntry->GuestInstruction.uUnused = 0;
3395 pEntry->GuestInstruction.fExec = fExec;
3396}
3397
3398
3399/**
3400 * Debug Info: Record info about guest register shadowing.
3401 */
3402static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3403 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3404{
3405 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3406 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3407 pEntry->GuestRegShadowing.uUnused = 0;
3408 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3409 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3410 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3411}
3412
3413
3414# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3415/**
3416 * Debug Info: Record info about delayed RIP updates.
3417 */
3418static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3419{
3420 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3421 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3422 pEntry->DelayedPcUpdate.offPc = offPc;
3423 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3424}
3425# endif
3426
3427#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3428
3429
3430/*********************************************************************************************************************************
3431* Register Allocator *
3432*********************************************************************************************************************************/
3433
3434/**
3435 * Register parameter indexes (indexed by argument number).
3436 */
3437DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3438{
3439 IEMNATIVE_CALL_ARG0_GREG,
3440 IEMNATIVE_CALL_ARG1_GREG,
3441 IEMNATIVE_CALL_ARG2_GREG,
3442 IEMNATIVE_CALL_ARG3_GREG,
3443#if defined(IEMNATIVE_CALL_ARG4_GREG)
3444 IEMNATIVE_CALL_ARG4_GREG,
3445# if defined(IEMNATIVE_CALL_ARG5_GREG)
3446 IEMNATIVE_CALL_ARG5_GREG,
3447# if defined(IEMNATIVE_CALL_ARG6_GREG)
3448 IEMNATIVE_CALL_ARG6_GREG,
3449# if defined(IEMNATIVE_CALL_ARG7_GREG)
3450 IEMNATIVE_CALL_ARG7_GREG,
3451# endif
3452# endif
3453# endif
3454#endif
3455};
3456
3457/**
3458 * Call register masks indexed by argument count.
3459 */
3460DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3461{
3462 0,
3463 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3464 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3465 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3466 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3467 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3468#if defined(IEMNATIVE_CALL_ARG4_GREG)
3469 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3470 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3471# if defined(IEMNATIVE_CALL_ARG5_GREG)
3472 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3473 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3474# if defined(IEMNATIVE_CALL_ARG6_GREG)
3475 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3476 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3477 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3478# if defined(IEMNATIVE_CALL_ARG7_GREG)
3479 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3480 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3481 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3482# endif
3483# endif
3484# endif
3485#endif
3486};
3487
3488#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3489/**
3490 * BP offset of the stack argument slots.
3491 *
3492 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3493 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3494 */
3495DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3496{
3497 IEMNATIVE_FP_OFF_STACK_ARG0,
3498# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3499 IEMNATIVE_FP_OFF_STACK_ARG1,
3500# endif
3501# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3502 IEMNATIVE_FP_OFF_STACK_ARG2,
3503# endif
3504# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3505 IEMNATIVE_FP_OFF_STACK_ARG3,
3506# endif
3507};
3508AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3509#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3510
3511/**
3512 * Info about shadowed guest register values.
3513 * @see IEMNATIVEGSTREG
3514 */
3515static struct
3516{
3517 /** Offset in VMCPU. */
3518 uint32_t off;
3519 /** The field size. */
3520 uint8_t cb;
3521 /** Name (for logging). */
3522 const char *pszName;
3523} const g_aGstShadowInfo[] =
3524{
3525#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3526 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3527 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3528 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3529 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3530 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3531 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3532 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3533 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3534 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3535 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3536 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3537 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3538 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3539 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3540 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3541 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3542 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3543 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3544 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3545 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3546 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3547 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3548 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3549 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3550 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3551 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3552 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3553 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3554 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3555 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3556 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3557 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3558 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3559 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3560 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3561 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3562 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3563 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3564 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3565 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3566 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3567 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3568 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3569 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3570 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3571 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3572 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3573#undef CPUMCTX_OFF_AND_SIZE
3574};
3575AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3576
3577
3578/** Host CPU general purpose register names. */
3579DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3580{
3581#ifdef RT_ARCH_AMD64
3582 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3583#elif RT_ARCH_ARM64
3584 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3585 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3586#else
3587# error "port me"
3588#endif
3589};
3590
3591
3592DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3593 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3594{
3595 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3596
3597 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3598 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3599 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3600 return (uint8_t)idxReg;
3601}
3602
3603
3604#if 0 /* unused */
3605/**
3606 * Tries to locate a suitable register in the given register mask.
3607 *
3608 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3609 * failed.
3610 *
3611 * @returns Host register number on success, returns UINT8_MAX on failure.
3612 */
3613static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3614{
3615 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3616 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3617 if (fRegs)
3618 {
3619 /** @todo pick better here: */
3620 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3621
3622 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3623 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3624 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3625 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3626
3627 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3628 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3629 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3630 return idxReg;
3631 }
3632 return UINT8_MAX;
3633}
3634#endif /* unused */
3635
3636
3637/**
3638 * Locate a register, possibly freeing one up.
3639 *
3640 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3641 * failed.
3642 *
3643 * @returns Host register number on success. Returns UINT8_MAX if no registers
3644 * found, the caller is supposed to deal with this and raise a
3645 * allocation type specific status code (if desired).
3646 *
3647 * @throws VBox status code if we're run into trouble spilling a variable of
3648 * recording debug info. Does NOT throw anything if we're out of
3649 * registers, though.
3650 */
3651static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3652 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3653{
3654 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3655 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3656 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3657
3658 /*
3659 * Try a freed register that's shadowing a guest register.
3660 */
3661 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3662 if (fRegs)
3663 {
3664 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3665
3666#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3667 /*
3668 * When we have livness information, we use it to kick out all shadowed
3669 * guest register that will not be needed any more in this TB. If we're
3670 * lucky, this may prevent us from ending up here again.
3671 *
3672 * Note! We must consider the previous entry here so we don't free
3673 * anything that the current threaded function requires (current
3674 * entry is produced by the next threaded function).
3675 */
3676 uint32_t const idxCurCall = pReNative->idxCurCall;
3677 if (idxCurCall > 0)
3678 {
3679 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3680
3681# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3682 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3683 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3684 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3685#else
3686 /* Construct a mask of the registers not in the read or write state.
3687 Note! We could skips writes, if they aren't from us, as this is just
3688 a hack to prevent trashing registers that have just been written
3689 or will be written when we retire the current instruction. */
3690 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3691 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3692 & IEMLIVENESSBIT_MASK;
3693#endif
3694 /* Merge EFLAGS. */
3695 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3696 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3697 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3698 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3699 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3700
3701 /* If it matches any shadowed registers. */
3702 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3703 {
3704 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3705 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3706 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3707
3708 /* See if we've got any unshadowed registers we can return now. */
3709 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3710 if (fUnshadowedRegs)
3711 {
3712 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3713 return (fPreferVolatile
3714 ? ASMBitFirstSetU32(fUnshadowedRegs)
3715 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3716 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3717 - 1;
3718 }
3719 }
3720 }
3721#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3722
3723 unsigned const idxReg = (fPreferVolatile
3724 ? ASMBitFirstSetU32(fRegs)
3725 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3726 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3727 - 1;
3728
3729 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3730 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3731 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3732 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3733
3734 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3735 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3736 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3737 return idxReg;
3738 }
3739
3740 /*
3741 * Try free up a variable that's in a register.
3742 *
3743 * We do two rounds here, first evacuating variables we don't need to be
3744 * saved on the stack, then in the second round move things to the stack.
3745 */
3746 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3747 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3748 {
3749 uint32_t fVars = pReNative->Core.bmVars;
3750 while (fVars)
3751 {
3752 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3753 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3754 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3755 && (RT_BIT_32(idxReg) & fRegMask)
3756 && ( iLoop == 0
3757 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3758 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3759 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3760 {
3761 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3762 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3763 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3764 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3765 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3766 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3767
3768 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3769 {
3770 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3771 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3772 }
3773
3774 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3775 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3776
3777 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3778 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3779 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3780 return idxReg;
3781 }
3782 fVars &= ~RT_BIT_32(idxVar);
3783 }
3784 }
3785
3786 return UINT8_MAX;
3787}
3788
3789
3790/**
3791 * Reassigns a variable to a different register specified by the caller.
3792 *
3793 * @returns The new code buffer position.
3794 * @param pReNative The native recompile state.
3795 * @param off The current code buffer position.
3796 * @param idxVar The variable index.
3797 * @param idxRegOld The old host register number.
3798 * @param idxRegNew The new host register number.
3799 * @param pszCaller The caller for logging.
3800 */
3801static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3802 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3803{
3804 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3805 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3806 RT_NOREF(pszCaller);
3807
3808 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3809
3810 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3811 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3812 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3813 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3814
3815 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3816 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3817 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3818 if (fGstRegShadows)
3819 {
3820 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3821 | RT_BIT_32(idxRegNew);
3822 while (fGstRegShadows)
3823 {
3824 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3825 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3826
3827 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3828 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3829 }
3830 }
3831
3832 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3833 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3834 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3835 return off;
3836}
3837
3838
3839/**
3840 * Moves a variable to a different register or spills it onto the stack.
3841 *
3842 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3843 * kinds can easily be recreated if needed later.
3844 *
3845 * @returns The new code buffer position.
3846 * @param pReNative The native recompile state.
3847 * @param off The current code buffer position.
3848 * @param idxVar The variable index.
3849 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3850 * call-volatile registers.
3851 */
3852static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3853 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3854{
3855 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3856 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3857 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3858 Assert(!pVar->fRegAcquired);
3859
3860 uint8_t const idxRegOld = pVar->idxReg;
3861 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3862 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3863 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3864 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3865 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3866 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3867 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3868 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3869
3870
3871 /** @todo Add statistics on this.*/
3872 /** @todo Implement basic variable liveness analysis (python) so variables
3873 * can be freed immediately once no longer used. This has the potential to
3874 * be trashing registers and stack for dead variables.
3875 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3876
3877 /*
3878 * First try move it to a different register, as that's cheaper.
3879 */
3880 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3881 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3882 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3883 if (fRegs)
3884 {
3885 /* Avoid using shadow registers, if possible. */
3886 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3887 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3888 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3889 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3890 }
3891
3892 /*
3893 * Otherwise we must spill the register onto the stack.
3894 */
3895 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3896 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3897 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3898 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3899
3900 pVar->idxReg = UINT8_MAX;
3901 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3902 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3903 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3904 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3905 return off;
3906}
3907
3908
3909/**
3910 * Allocates a temporary host general purpose register.
3911 *
3912 * This may emit code to save register content onto the stack in order to free
3913 * up a register.
3914 *
3915 * @returns The host register number; throws VBox status code on failure,
3916 * so no need to check the return value.
3917 * @param pReNative The native recompile state.
3918 * @param poff Pointer to the variable with the code buffer position.
3919 * This will be update if we need to move a variable from
3920 * register to stack in order to satisfy the request.
3921 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3922 * registers (@c true, default) or the other way around
3923 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3924 */
3925DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3926{
3927 /*
3928 * Try find a completely unused register, preferably a call-volatile one.
3929 */
3930 uint8_t idxReg;
3931 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3932 & ~pReNative->Core.bmHstRegsWithGstShadow
3933 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3934 if (fRegs)
3935 {
3936 if (fPreferVolatile)
3937 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3938 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3939 else
3940 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3941 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3942 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3943 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3944 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3945 }
3946 else
3947 {
3948 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3949 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3950 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3951 }
3952 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3953}
3954
3955
3956/**
3957 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3958 * registers.
3959 *
3960 * @returns The host register number; throws VBox status code on failure,
3961 * so no need to check the return value.
3962 * @param pReNative The native recompile state.
3963 * @param poff Pointer to the variable with the code buffer position.
3964 * This will be update if we need to move a variable from
3965 * register to stack in order to satisfy the request.
3966 * @param fRegMask Mask of acceptable registers.
3967 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3968 * registers (@c true, default) or the other way around
3969 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3970 */
3971DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3972 bool fPreferVolatile /*= true*/)
3973{
3974 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3975 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3976
3977 /*
3978 * Try find a completely unused register, preferably a call-volatile one.
3979 */
3980 uint8_t idxReg;
3981 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3982 & ~pReNative->Core.bmHstRegsWithGstShadow
3983 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3984 & fRegMask;
3985 if (fRegs)
3986 {
3987 if (fPreferVolatile)
3988 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3989 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3990 else
3991 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3992 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3993 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3994 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3995 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3996 }
3997 else
3998 {
3999 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4000 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4001 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4002 }
4003 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4004}
4005
4006
4007/**
4008 * Allocates a temporary register for loading an immediate value into.
4009 *
4010 * This will emit code to load the immediate, unless there happens to be an
4011 * unused register with the value already loaded.
4012 *
4013 * The caller will not modify the returned register, it must be considered
4014 * read-only. Free using iemNativeRegFreeTmpImm.
4015 *
4016 * @returns The host register number; throws VBox status code on failure, so no
4017 * need to check the return value.
4018 * @param pReNative The native recompile state.
4019 * @param poff Pointer to the variable with the code buffer position.
4020 * @param uImm The immediate value that the register must hold upon
4021 * return.
4022 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4023 * registers (@c true, default) or the other way around
4024 * (@c false).
4025 *
4026 * @note Reusing immediate values has not been implemented yet.
4027 */
4028DECL_HIDDEN_THROW(uint8_t)
4029iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4030{
4031 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4032 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4033 return idxReg;
4034}
4035
4036#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4037
4038# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4039/**
4040 * Helper for iemNativeLivenessGetStateByGstReg.
4041 *
4042 * @returns IEMLIVENESS_STATE_XXX
4043 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4044 * ORed together.
4045 */
4046DECL_FORCE_INLINE(uint32_t)
4047iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4048{
4049 /* INPUT trumps anything else. */
4050 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4051 return IEMLIVENESS_STATE_INPUT;
4052
4053 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4054 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4055 {
4056 /* If not all sub-fields are clobbered they must be considered INPUT. */
4057 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4058 return IEMLIVENESS_STATE_INPUT;
4059 return IEMLIVENESS_STATE_CLOBBERED;
4060 }
4061
4062 /* XCPT_OR_CALL trumps UNUSED. */
4063 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4064 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4065
4066 return IEMLIVENESS_STATE_UNUSED;
4067}
4068# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4069
4070
4071DECL_FORCE_INLINE(uint32_t)
4072iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4073{
4074# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4075 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4076 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4077# else
4078 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4079 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4080 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4081 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4082# endif
4083}
4084
4085
4086DECL_FORCE_INLINE(uint32_t)
4087iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4088{
4089 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4090 if (enmGstReg == kIemNativeGstReg_EFlags)
4091 {
4092 /* Merge the eflags states to one. */
4093# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4094 uRet = RT_BIT_32(uRet);
4095 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4096 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4097 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4098 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4099 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4100 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4101 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4102# else
4103 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4104 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4105 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4106 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4107 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4108 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4109 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4110# endif
4111 }
4112 return uRet;
4113}
4114
4115
4116# ifdef VBOX_STRICT
4117/** For assertions only, user checks that idxCurCall isn't zerow. */
4118DECL_FORCE_INLINE(uint32_t)
4119iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4120{
4121 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4122}
4123# endif /* VBOX_STRICT */
4124
4125#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4126
4127/**
4128 * Marks host register @a idxHstReg as containing a shadow copy of guest
4129 * register @a enmGstReg.
4130 *
4131 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4132 * host register before calling.
4133 */
4134DECL_FORCE_INLINE(void)
4135iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4136{
4137 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4138 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4139 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4140
4141 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4142 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4143 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4144 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4145#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4146 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4147 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4148#else
4149 RT_NOREF(off);
4150#endif
4151}
4152
4153
4154/**
4155 * Clear any guest register shadow claims from @a idxHstReg.
4156 *
4157 * The register does not need to be shadowing any guest registers.
4158 */
4159DECL_FORCE_INLINE(void)
4160iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4161{
4162 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4163 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4164 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4165 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4166 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4167
4168#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4169 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4170 if (fGstRegs)
4171 {
4172 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4173 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4174 while (fGstRegs)
4175 {
4176 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4177 fGstRegs &= ~RT_BIT_64(iGstReg);
4178 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4179 }
4180 }
4181#else
4182 RT_NOREF(off);
4183#endif
4184
4185 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4186 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4187 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4188}
4189
4190
4191/**
4192 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4193 * and global overview flags.
4194 */
4195DECL_FORCE_INLINE(void)
4196iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4197{
4198 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4199 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4200 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4201 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4202 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4203 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4204 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4205
4206#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4207 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4208 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4209#else
4210 RT_NOREF(off);
4211#endif
4212
4213 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4214 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4215 if (!fGstRegShadowsNew)
4216 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4217 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4218}
4219
4220
4221#if 0 /* unused */
4222/**
4223 * Clear any guest register shadow claim for @a enmGstReg.
4224 */
4225DECL_FORCE_INLINE(void)
4226iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4227{
4228 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4229 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4230 {
4231 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4232 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4233 }
4234}
4235#endif
4236
4237
4238/**
4239 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4240 * as the new shadow of it.
4241 *
4242 * Unlike the other guest reg shadow helpers, this does the logging for you.
4243 * However, it is the liveness state is not asserted here, the caller must do
4244 * that.
4245 */
4246DECL_FORCE_INLINE(void)
4247iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4248 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4249{
4250 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4251 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4252 {
4253 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4254 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4255 if (idxHstRegOld == idxHstRegNew)
4256 return;
4257 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4258 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4259 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4260 }
4261 else
4262 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4263 g_aGstShadowInfo[enmGstReg].pszName));
4264 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4265}
4266
4267
4268/**
4269 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4270 * to @a idxRegTo.
4271 */
4272DECL_FORCE_INLINE(void)
4273iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4274 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4275{
4276 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4277 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4278 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4279 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4280 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4281 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4282 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4283 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4284 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4285
4286 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4287 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4288 if (!fGstRegShadowsFrom)
4289 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4290 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4291 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4292 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4293#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4294 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4295 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4296#else
4297 RT_NOREF(off);
4298#endif
4299}
4300
4301
4302/**
4303 * Allocates a temporary host general purpose register for keeping a guest
4304 * register value.
4305 *
4306 * Since we may already have a register holding the guest register value,
4307 * code will be emitted to do the loading if that's not the case. Code may also
4308 * be emitted if we have to free up a register to satify the request.
4309 *
4310 * @returns The host register number; throws VBox status code on failure, so no
4311 * need to check the return value.
4312 * @param pReNative The native recompile state.
4313 * @param poff Pointer to the variable with the code buffer
4314 * position. This will be update if we need to move a
4315 * variable from register to stack in order to satisfy
4316 * the request.
4317 * @param enmGstReg The guest register that will is to be updated.
4318 * @param enmIntendedUse How the caller will be using the host register.
4319 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4320 * register is okay (default). The ASSUMPTION here is
4321 * that the caller has already flushed all volatile
4322 * registers, so this is only applied if we allocate a
4323 * new register.
4324 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4325 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4326 */
4327DECL_HIDDEN_THROW(uint8_t)
4328iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4329 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4330 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4331{
4332 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4333#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4334 AssertMsg( fSkipLivenessAssert
4335 || pReNative->idxCurCall == 0
4336 || enmGstReg == kIemNativeGstReg_Pc
4337 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4338 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4339 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4340 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4341 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4342 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4343#endif
4344 RT_NOREF(fSkipLivenessAssert);
4345#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4346 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4347#endif
4348 uint32_t const fRegMask = !fNoVolatileRegs
4349 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4350 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4351
4352 /*
4353 * First check if the guest register value is already in a host register.
4354 */
4355 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4356 {
4357 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4358 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4359 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4360 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4361
4362 /* It's not supposed to be allocated... */
4363 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4364 {
4365 /*
4366 * If the register will trash the guest shadow copy, try find a
4367 * completely unused register we can use instead. If that fails,
4368 * we need to disassociate the host reg from the guest reg.
4369 */
4370 /** @todo would be nice to know if preserving the register is in any way helpful. */
4371 /* If the purpose is calculations, try duplicate the register value as
4372 we'll be clobbering the shadow. */
4373 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4374 && ( ~pReNative->Core.bmHstRegs
4375 & ~pReNative->Core.bmHstRegsWithGstShadow
4376 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4377 {
4378 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4379
4380 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4381
4382 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4383 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4384 g_apszIemNativeHstRegNames[idxRegNew]));
4385 idxReg = idxRegNew;
4386 }
4387 /* If the current register matches the restrictions, go ahead and allocate
4388 it for the caller. */
4389 else if (fRegMask & RT_BIT_32(idxReg))
4390 {
4391 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4392 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4393 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4394 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4395 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4396 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4397 else
4398 {
4399 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4400 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4401 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4402 }
4403 }
4404 /* Otherwise, allocate a register that satisfies the caller and transfer
4405 the shadowing if compatible with the intended use. (This basically
4406 means the call wants a non-volatile register (RSP push/pop scenario).) */
4407 else
4408 {
4409 Assert(fNoVolatileRegs);
4410 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4411 !fNoVolatileRegs
4412 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4413 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4414 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4415 {
4416 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4417 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4418 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4419 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4420 }
4421 else
4422 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4423 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4424 g_apszIemNativeHstRegNames[idxRegNew]));
4425 idxReg = idxRegNew;
4426 }
4427 }
4428 else
4429 {
4430 /*
4431 * Oops. Shadowed guest register already allocated!
4432 *
4433 * Allocate a new register, copy the value and, if updating, the
4434 * guest shadow copy assignment to the new register.
4435 */
4436 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4437 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4438 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4439 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4440
4441 /** @todo share register for readonly access. */
4442 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4443 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4444
4445 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4446 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4447
4448 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4449 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4450 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4451 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4452 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4453 else
4454 {
4455 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4456 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4457 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4458 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4459 }
4460 idxReg = idxRegNew;
4461 }
4462 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4463
4464#ifdef VBOX_STRICT
4465 /* Strict builds: Check that the value is correct. */
4466 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4467#endif
4468
4469 return idxReg;
4470 }
4471
4472 /*
4473 * Allocate a new register, load it with the guest value and designate it as a copy of the
4474 */
4475 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4476
4477 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4478 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4479
4480 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4481 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4482 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4483 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4484
4485 return idxRegNew;
4486}
4487
4488
4489/**
4490 * Allocates a temporary host general purpose register that already holds the
4491 * given guest register value.
4492 *
4493 * The use case for this function is places where the shadowing state cannot be
4494 * modified due to branching and such. This will fail if the we don't have a
4495 * current shadow copy handy or if it's incompatible. The only code that will
4496 * be emitted here is value checking code in strict builds.
4497 *
4498 * The intended use can only be readonly!
4499 *
4500 * @returns The host register number, UINT8_MAX if not present.
4501 * @param pReNative The native recompile state.
4502 * @param poff Pointer to the instruction buffer offset.
4503 * Will be updated in strict builds if a register is
4504 * found.
4505 * @param enmGstReg The guest register that will is to be updated.
4506 * @note In strict builds, this may throw instruction buffer growth failures.
4507 * Non-strict builds will not throw anything.
4508 * @sa iemNativeRegAllocTmpForGuestReg
4509 */
4510DECL_HIDDEN_THROW(uint8_t)
4511iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4512{
4513 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4514#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4515 AssertMsg( pReNative->idxCurCall == 0
4516 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4517 || enmGstReg == kIemNativeGstReg_Pc,
4518 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4519#endif
4520
4521 /*
4522 * First check if the guest register value is already in a host register.
4523 */
4524 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4525 {
4526 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4527 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4528 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4529 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4530
4531 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4532 {
4533 /*
4534 * We only do readonly use here, so easy compared to the other
4535 * variant of this code.
4536 */
4537 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4538 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4539 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4540 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4541 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4542
4543#ifdef VBOX_STRICT
4544 /* Strict builds: Check that the value is correct. */
4545 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4546#else
4547 RT_NOREF(poff);
4548#endif
4549 return idxReg;
4550 }
4551 }
4552
4553 return UINT8_MAX;
4554}
4555
4556
4557/**
4558 * Allocates argument registers for a function call.
4559 *
4560 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4561 * need to check the return value.
4562 * @param pReNative The native recompile state.
4563 * @param off The current code buffer offset.
4564 * @param cArgs The number of arguments the function call takes.
4565 */
4566DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4567{
4568 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4569 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4570 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4571 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4572
4573 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4574 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4575 else if (cArgs == 0)
4576 return true;
4577
4578 /*
4579 * Do we get luck and all register are free and not shadowing anything?
4580 */
4581 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4582 for (uint32_t i = 0; i < cArgs; i++)
4583 {
4584 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4585 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4586 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4587 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4588 }
4589 /*
4590 * Okay, not lucky so we have to free up the registers.
4591 */
4592 else
4593 for (uint32_t i = 0; i < cArgs; i++)
4594 {
4595 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4596 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4597 {
4598 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4599 {
4600 case kIemNativeWhat_Var:
4601 {
4602 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4603 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4604 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4605 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4606 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4607
4608 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4609 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4610 else
4611 {
4612 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4613 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4614 }
4615 break;
4616 }
4617
4618 case kIemNativeWhat_Tmp:
4619 case kIemNativeWhat_Arg:
4620 case kIemNativeWhat_rc:
4621 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4622 default:
4623 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4624 }
4625
4626 }
4627 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4628 {
4629 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4630 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4631 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4632 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4633 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4634 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4635 }
4636 else
4637 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4638 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4639 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4640 }
4641 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4642 return true;
4643}
4644
4645
4646DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4647
4648
4649#if 0
4650/**
4651 * Frees a register assignment of any type.
4652 *
4653 * @param pReNative The native recompile state.
4654 * @param idxHstReg The register to free.
4655 *
4656 * @note Does not update variables.
4657 */
4658DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4659{
4660 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4661 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4662 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4663 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4664 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4665 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4666 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4667 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4668 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4669 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4670 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4671 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4672 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4673 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4674
4675 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4676 /* no flushing, right:
4677 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4678 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4679 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4680 */
4681}
4682#endif
4683
4684
4685/**
4686 * Frees a temporary register.
4687 *
4688 * Any shadow copies of guest registers assigned to the host register will not
4689 * be flushed by this operation.
4690 */
4691DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4692{
4693 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4694 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4695 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4696 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4697 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4698}
4699
4700
4701/**
4702 * Frees a temporary immediate register.
4703 *
4704 * It is assumed that the call has not modified the register, so it still hold
4705 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4706 */
4707DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4708{
4709 iemNativeRegFreeTmp(pReNative, idxHstReg);
4710}
4711
4712
4713/**
4714 * Frees a register assigned to a variable.
4715 *
4716 * The register will be disassociated from the variable.
4717 */
4718DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4719{
4720 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4721 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4722 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4723 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4724 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4725
4726 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4727 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4728 if (!fFlushShadows)
4729 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4730 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4731 else
4732 {
4733 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4734 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4735 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4736 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4737 uint64_t fGstRegShadows = fGstRegShadowsOld;
4738 while (fGstRegShadows)
4739 {
4740 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4741 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4742
4743 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4744 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4745 }
4746 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4747 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4748 }
4749}
4750
4751
4752/**
4753 * Called right before emitting a call instruction to move anything important
4754 * out of call-volatile registers, free and flush the call-volatile registers,
4755 * optionally freeing argument variables.
4756 *
4757 * @returns New code buffer offset, UINT32_MAX on failure.
4758 * @param pReNative The native recompile state.
4759 * @param off The code buffer offset.
4760 * @param cArgs The number of arguments the function call takes.
4761 * It is presumed that the host register part of these have
4762 * been allocated as such already and won't need moving,
4763 * just freeing.
4764 * @param fKeepVars Mask of variables that should keep their register
4765 * assignments. Caller must take care to handle these.
4766 */
4767DECL_HIDDEN_THROW(uint32_t)
4768iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4769{
4770 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4771
4772 /* fKeepVars will reduce this mask. */
4773 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4774
4775 /*
4776 * Move anything important out of volatile registers.
4777 */
4778 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4779 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4780 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4781#ifdef IEMNATIVE_REG_FIXED_TMP0
4782 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4783#endif
4784#ifdef IEMNATIVE_REG_FIXED_TMP1
4785 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4786#endif
4787#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4788 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4789#endif
4790 & ~g_afIemNativeCallRegs[cArgs];
4791
4792 fRegsToMove &= pReNative->Core.bmHstRegs;
4793 if (!fRegsToMove)
4794 { /* likely */ }
4795 else
4796 {
4797 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4798 while (fRegsToMove != 0)
4799 {
4800 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4801 fRegsToMove &= ~RT_BIT_32(idxReg);
4802
4803 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4804 {
4805 case kIemNativeWhat_Var:
4806 {
4807 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4808 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4809 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4810 Assert(pVar->idxReg == idxReg);
4811 if (!(RT_BIT_32(idxVar) & fKeepVars))
4812 {
4813 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4814 idxVar, pVar->enmKind, pVar->idxReg));
4815 if (pVar->enmKind != kIemNativeVarKind_Stack)
4816 pVar->idxReg = UINT8_MAX;
4817 else
4818 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4819 }
4820 else
4821 fRegsToFree &= ~RT_BIT_32(idxReg);
4822 continue;
4823 }
4824
4825 case kIemNativeWhat_Arg:
4826 AssertMsgFailed(("What?!?: %u\n", idxReg));
4827 continue;
4828
4829 case kIemNativeWhat_rc:
4830 case kIemNativeWhat_Tmp:
4831 AssertMsgFailed(("Missing free: %u\n", idxReg));
4832 continue;
4833
4834 case kIemNativeWhat_FixedTmp:
4835 case kIemNativeWhat_pVCpuFixed:
4836 case kIemNativeWhat_pCtxFixed:
4837 case kIemNativeWhat_PcShadow:
4838 case kIemNativeWhat_FixedReserved:
4839 case kIemNativeWhat_Invalid:
4840 case kIemNativeWhat_End:
4841 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4842 }
4843 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4844 }
4845 }
4846
4847 /*
4848 * Do the actual freeing.
4849 */
4850 if (pReNative->Core.bmHstRegs & fRegsToFree)
4851 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4852 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4853 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4854
4855 /* If there are guest register shadows in any call-volatile register, we
4856 have to clear the corrsponding guest register masks for each register. */
4857 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4858 if (fHstRegsWithGstShadow)
4859 {
4860 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4861 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4862 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4863 do
4864 {
4865 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4866 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4867
4868 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4869 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4870 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4871 } while (fHstRegsWithGstShadow != 0);
4872 }
4873
4874 return off;
4875}
4876
4877
4878/**
4879 * Flushes a set of guest register shadow copies.
4880 *
4881 * This is usually done after calling a threaded function or a C-implementation
4882 * of an instruction.
4883 *
4884 * @param pReNative The native recompile state.
4885 * @param fGstRegs Set of guest registers to flush.
4886 */
4887DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4888{
4889 /*
4890 * Reduce the mask by what's currently shadowed
4891 */
4892 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4893 fGstRegs &= bmGstRegShadowsOld;
4894 if (fGstRegs)
4895 {
4896 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4897 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4898 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4899 if (bmGstRegShadowsNew)
4900 {
4901 /*
4902 * Partial.
4903 */
4904 do
4905 {
4906 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4907 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4908 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4909 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4910 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4911
4912 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4913 fGstRegs &= ~fInThisHstReg;
4914 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4915 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4916 if (!fGstRegShadowsNew)
4917 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4918 } while (fGstRegs != 0);
4919 }
4920 else
4921 {
4922 /*
4923 * Clear all.
4924 */
4925 do
4926 {
4927 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4928 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4929 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4930 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4931 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4932
4933 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4934 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4935 } while (fGstRegs != 0);
4936 pReNative->Core.bmHstRegsWithGstShadow = 0;
4937 }
4938 }
4939}
4940
4941
4942/**
4943 * Flushes guest register shadow copies held by a set of host registers.
4944 *
4945 * This is used with the TLB lookup code for ensuring that we don't carry on
4946 * with any guest shadows in volatile registers, as these will get corrupted by
4947 * a TLB miss.
4948 *
4949 * @param pReNative The native recompile state.
4950 * @param fHstRegs Set of host registers to flush guest shadows for.
4951 */
4952DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4953{
4954 /*
4955 * Reduce the mask by what's currently shadowed.
4956 */
4957 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4958 fHstRegs &= bmHstRegsWithGstShadowOld;
4959 if (fHstRegs)
4960 {
4961 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4962 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4963 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4964 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4965 if (bmHstRegsWithGstShadowNew)
4966 {
4967 /*
4968 * Partial (likely).
4969 */
4970 uint64_t fGstShadows = 0;
4971 do
4972 {
4973 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4974 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4975 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4976 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4977
4978 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4979 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4980 fHstRegs &= ~RT_BIT_32(idxHstReg);
4981 } while (fHstRegs != 0);
4982 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4983 }
4984 else
4985 {
4986 /*
4987 * Clear all.
4988 */
4989 do
4990 {
4991 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4992 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4993 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4994 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4995
4996 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4997 fHstRegs &= ~RT_BIT_32(idxHstReg);
4998 } while (fHstRegs != 0);
4999 pReNative->Core.bmGstRegShadows = 0;
5000 }
5001 }
5002}
5003
5004
5005/**
5006 * Restores guest shadow copies in volatile registers.
5007 *
5008 * This is used after calling a helper function (think TLB miss) to restore the
5009 * register state of volatile registers.
5010 *
5011 * @param pReNative The native recompile state.
5012 * @param off The code buffer offset.
5013 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5014 * be active (allocated) w/o asserting. Hack.
5015 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5016 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5017 */
5018DECL_HIDDEN_THROW(uint32_t)
5019iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5020{
5021 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5022 if (fHstRegs)
5023 {
5024 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5025 do
5026 {
5027 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5028
5029 /* It's not fatal if a register is active holding a variable that
5030 shadowing a guest register, ASSUMING all pending guest register
5031 writes were flushed prior to the helper call. However, we'll be
5032 emitting duplicate restores, so it wasts code space. */
5033 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5034 RT_NOREF(fHstRegsActiveShadows);
5035
5036 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5037 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5038 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5039 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5040
5041 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5042 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5043
5044 fHstRegs &= ~RT_BIT_32(idxHstReg);
5045 } while (fHstRegs != 0);
5046 }
5047 return off;
5048}
5049
5050
5051#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5052# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5053static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5054{
5055 /* Compare the shadow with the context value, they should match. */
5056 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5057 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5058 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5059 return off;
5060}
5061# endif
5062
5063/**
5064 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5065 */
5066static uint32_t
5067iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5068{
5069 if (pReNative->Core.offPc)
5070 {
5071# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5072 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5073 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5074# endif
5075
5076# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5077 /* Allocate a temporary PC register. */
5078 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5079
5080 /* Perform the addition and store the result. */
5081 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5082 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5083
5084 /* Free but don't flush the PC register. */
5085 iemNativeRegFreeTmp(pReNative, idxPcReg);
5086# else
5087 /* Compare the shadow with the context value, they should match. */
5088 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5089 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5090# endif
5091
5092 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5093 pReNative->Core.offPc = 0;
5094 pReNative->Core.cInstrPcUpdateSkipped = 0;
5095 }
5096# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5097 else
5098 {
5099 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5100 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5101 }
5102# endif
5103
5104 return off;
5105}
5106#endif
5107
5108
5109/**
5110 * Flushes delayed write of a specific guest register.
5111 *
5112 * This must be called prior to calling CImpl functions and any helpers that use
5113 * the guest state (like raising exceptions) and such.
5114 *
5115 * This optimization has not yet been implemented. The first target would be
5116 * RIP updates, since these are the most common ones.
5117 */
5118DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5119 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
5120{
5121#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5122 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
5123#endif
5124 RT_NOREF(pReNative, enmClass, idxReg);
5125 return off;
5126}
5127
5128
5129/**
5130 * Flushes any delayed guest register writes.
5131 *
5132 * This must be called prior to calling CImpl functions and any helpers that use
5133 * the guest state (like raising exceptions) and such.
5134 *
5135 * This optimization has not yet been implemented. The first target would be
5136 * RIP updates, since these are the most common ones.
5137 */
5138DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/)
5139{
5140#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5141 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5142 off = iemNativeEmitPcWriteback(pReNative, off);
5143#else
5144 RT_NOREF(pReNative, fGstShwExcept);
5145#endif
5146
5147 return off;
5148}
5149
5150
5151#ifdef VBOX_STRICT
5152/**
5153 * Does internal register allocator sanity checks.
5154 */
5155static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5156{
5157 /*
5158 * Iterate host registers building a guest shadowing set.
5159 */
5160 uint64_t bmGstRegShadows = 0;
5161 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5162 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5163 while (bmHstRegsWithGstShadow)
5164 {
5165 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5166 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5167 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5168
5169 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5170 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5171 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5172 bmGstRegShadows |= fThisGstRegShadows;
5173 while (fThisGstRegShadows)
5174 {
5175 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5176 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5177 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5178 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5179 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5180 }
5181 }
5182 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5183 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5184 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5185
5186 /*
5187 * Now the other way around, checking the guest to host index array.
5188 */
5189 bmHstRegsWithGstShadow = 0;
5190 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5191 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5192 while (bmGstRegShadows)
5193 {
5194 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5195 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5196 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5197
5198 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5199 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5200 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5201 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5202 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5203 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5204 }
5205 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5206 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5207 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5208}
5209#endif
5210
5211
5212/*********************************************************************************************************************************
5213* Code Emitters (larger snippets) *
5214*********************************************************************************************************************************/
5215
5216/**
5217 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5218 * extending to 64-bit width.
5219 *
5220 * @returns New code buffer offset on success, UINT32_MAX on failure.
5221 * @param pReNative .
5222 * @param off The current code buffer position.
5223 * @param idxHstReg The host register to load the guest register value into.
5224 * @param enmGstReg The guest register to load.
5225 *
5226 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5227 * that is something the caller needs to do if applicable.
5228 */
5229DECL_HIDDEN_THROW(uint32_t)
5230iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5231{
5232 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
5233 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5234
5235 switch (g_aGstShadowInfo[enmGstReg].cb)
5236 {
5237 case sizeof(uint64_t):
5238 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5239 case sizeof(uint32_t):
5240 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5241 case sizeof(uint16_t):
5242 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5243#if 0 /* not present in the table. */
5244 case sizeof(uint8_t):
5245 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5246#endif
5247 default:
5248 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5249 }
5250}
5251
5252
5253#ifdef VBOX_STRICT
5254/**
5255 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5256 *
5257 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5258 * Trashes EFLAGS on AMD64.
5259 */
5260static uint32_t
5261iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5262{
5263# ifdef RT_ARCH_AMD64
5264 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5265
5266 /* rol reg64, 32 */
5267 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5268 pbCodeBuf[off++] = 0xc1;
5269 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5270 pbCodeBuf[off++] = 32;
5271
5272 /* test reg32, ffffffffh */
5273 if (idxReg >= 8)
5274 pbCodeBuf[off++] = X86_OP_REX_B;
5275 pbCodeBuf[off++] = 0xf7;
5276 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5277 pbCodeBuf[off++] = 0xff;
5278 pbCodeBuf[off++] = 0xff;
5279 pbCodeBuf[off++] = 0xff;
5280 pbCodeBuf[off++] = 0xff;
5281
5282 /* je/jz +1 */
5283 pbCodeBuf[off++] = 0x74;
5284 pbCodeBuf[off++] = 0x01;
5285
5286 /* int3 */
5287 pbCodeBuf[off++] = 0xcc;
5288
5289 /* rol reg64, 32 */
5290 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5291 pbCodeBuf[off++] = 0xc1;
5292 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5293 pbCodeBuf[off++] = 32;
5294
5295# elif defined(RT_ARCH_ARM64)
5296 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5297 /* lsr tmp0, reg64, #32 */
5298 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5299 /* cbz tmp0, +1 */
5300 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5301 /* brk #0x1100 */
5302 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5303
5304# else
5305# error "Port me!"
5306# endif
5307 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5308 return off;
5309}
5310#endif /* VBOX_STRICT */
5311
5312
5313#ifdef VBOX_STRICT
5314/**
5315 * Emitting code that checks that the content of register @a idxReg is the same
5316 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5317 * instruction if that's not the case.
5318 *
5319 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5320 * Trashes EFLAGS on AMD64.
5321 */
5322static uint32_t
5323iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5324{
5325# ifdef RT_ARCH_AMD64
5326 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5327
5328 /* cmp reg, [mem] */
5329 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5330 {
5331 if (idxReg >= 8)
5332 pbCodeBuf[off++] = X86_OP_REX_R;
5333 pbCodeBuf[off++] = 0x38;
5334 }
5335 else
5336 {
5337 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5338 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5339 else
5340 {
5341 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5342 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5343 else
5344 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5345 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5346 if (idxReg >= 8)
5347 pbCodeBuf[off++] = X86_OP_REX_R;
5348 }
5349 pbCodeBuf[off++] = 0x39;
5350 }
5351 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5352
5353 /* je/jz +1 */
5354 pbCodeBuf[off++] = 0x74;
5355 pbCodeBuf[off++] = 0x01;
5356
5357 /* int3 */
5358 pbCodeBuf[off++] = 0xcc;
5359
5360 /* For values smaller than the register size, we must check that the rest
5361 of the register is all zeros. */
5362 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5363 {
5364 /* test reg64, imm32 */
5365 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5366 pbCodeBuf[off++] = 0xf7;
5367 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5368 pbCodeBuf[off++] = 0;
5369 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5370 pbCodeBuf[off++] = 0xff;
5371 pbCodeBuf[off++] = 0xff;
5372
5373 /* je/jz +1 */
5374 pbCodeBuf[off++] = 0x74;
5375 pbCodeBuf[off++] = 0x01;
5376
5377 /* int3 */
5378 pbCodeBuf[off++] = 0xcc;
5379 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5380 }
5381 else
5382 {
5383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5384 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5385 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5386 }
5387
5388# elif defined(RT_ARCH_ARM64)
5389 /* mov TMP0, [gstreg] */
5390 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5391
5392 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5393 /* sub tmp0, tmp0, idxReg */
5394 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5395 /* cbz tmp0, +1 */
5396 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5397 /* brk #0x1000+enmGstReg */
5398 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5400
5401# else
5402# error "Port me!"
5403# endif
5404 return off;
5405}
5406#endif /* VBOX_STRICT */
5407
5408
5409#ifdef VBOX_STRICT
5410/**
5411 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5412 * important bits.
5413 *
5414 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5415 * Trashes EFLAGS on AMD64.
5416 */
5417static uint32_t
5418iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5419{
5420 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5421 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5422 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5423 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5424
5425#ifdef RT_ARCH_AMD64
5426 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5427
5428 /* je/jz +1 */
5429 pbCodeBuf[off++] = 0x74;
5430 pbCodeBuf[off++] = 0x01;
5431
5432 /* int3 */
5433 pbCodeBuf[off++] = 0xcc;
5434
5435# elif defined(RT_ARCH_ARM64)
5436 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5437
5438 /* b.eq +1 */
5439 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5440 /* brk #0x2000 */
5441 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5442
5443# else
5444# error "Port me!"
5445# endif
5446 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5447
5448 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5449 return off;
5450}
5451#endif /* VBOX_STRICT */
5452
5453
5454/**
5455 * Emits a code for checking the return code of a call and rcPassUp, returning
5456 * from the code if either are non-zero.
5457 */
5458DECL_HIDDEN_THROW(uint32_t)
5459iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5460{
5461#ifdef RT_ARCH_AMD64
5462 /*
5463 * AMD64: eax = call status code.
5464 */
5465
5466 /* edx = rcPassUp */
5467 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5468# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5469 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5470# endif
5471
5472 /* edx = eax | rcPassUp */
5473 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5474 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5475 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5476 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5477
5478 /* Jump to non-zero status return path. */
5479 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5480
5481 /* done. */
5482
5483#elif RT_ARCH_ARM64
5484 /*
5485 * ARM64: w0 = call status code.
5486 */
5487# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5488 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5489# endif
5490 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5491
5492 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5493
5494 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5495
5496 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5497 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5498 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5499
5500#else
5501# error "port me"
5502#endif
5503 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5504 RT_NOREF_PV(idxInstr);
5505 return off;
5506}
5507
5508
5509/**
5510 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5511 * raising a \#GP(0) if it isn't.
5512 *
5513 * @returns New code buffer offset, UINT32_MAX on failure.
5514 * @param pReNative The native recompile state.
5515 * @param off The code buffer offset.
5516 * @param idxAddrReg The host register with the address to check.
5517 * @param idxInstr The current instruction.
5518 */
5519DECL_HIDDEN_THROW(uint32_t)
5520iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5521{
5522 /*
5523 * Make sure we don't have any outstanding guest register writes as we may
5524 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5525 */
5526 off = iemNativeRegFlushPendingWrites(pReNative, off);
5527
5528#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5529 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5530#else
5531 RT_NOREF(idxInstr);
5532#endif
5533
5534#ifdef RT_ARCH_AMD64
5535 /*
5536 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5537 * return raisexcpt();
5538 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5539 */
5540 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5541
5542 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5543 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5544 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5545 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5546 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5547
5548 iemNativeRegFreeTmp(pReNative, iTmpReg);
5549
5550#elif defined(RT_ARCH_ARM64)
5551 /*
5552 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5553 * return raisexcpt();
5554 * ----
5555 * mov x1, 0x800000000000
5556 * add x1, x0, x1
5557 * cmp xzr, x1, lsr 48
5558 * b.ne .Lraisexcpt
5559 */
5560 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5561
5562 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5563 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5564 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5565 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5566
5567 iemNativeRegFreeTmp(pReNative, iTmpReg);
5568
5569#else
5570# error "Port me"
5571#endif
5572 return off;
5573}
5574
5575
5576/**
5577 * Emits code to check if that the content of @a idxAddrReg is within the limit
5578 * of CS, raising a \#GP(0) if it isn't.
5579 *
5580 * @returns New code buffer offset; throws VBox status code on error.
5581 * @param pReNative The native recompile state.
5582 * @param off The code buffer offset.
5583 * @param idxAddrReg The host register (32-bit) with the address to
5584 * check.
5585 * @param idxInstr The current instruction.
5586 */
5587DECL_HIDDEN_THROW(uint32_t)
5588iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5589 uint8_t idxAddrReg, uint8_t idxInstr)
5590{
5591 /*
5592 * Make sure we don't have any outstanding guest register writes as we may
5593 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5594 */
5595 off = iemNativeRegFlushPendingWrites(pReNative, off);
5596
5597#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5598 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5599#else
5600 RT_NOREF(idxInstr);
5601#endif
5602
5603 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5604 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
5605 kIemNativeGstRegUse_ReadOnly);
5606
5607 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
5608 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5609
5610 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
5611 return off;
5612}
5613
5614
5615/**
5616 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5617 *
5618 * @returns The flush mask.
5619 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5620 * @param fGstShwFlush The starting flush mask.
5621 */
5622DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5623{
5624 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5625 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5626 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5627 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5628 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5629 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5630 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5631 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5632 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5633 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5634 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5635 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5636 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5637 return fGstShwFlush;
5638}
5639
5640
5641/**
5642 * Emits a call to a CImpl function or something similar.
5643 */
5644DECL_HIDDEN_THROW(uint32_t)
5645iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5646 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5647{
5648 /* Writeback everything. */
5649 off = iemNativeRegFlushPendingWrites(pReNative, off);
5650
5651 /*
5652 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5653 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5654 */
5655 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5656 fGstShwFlush
5657 | RT_BIT_64(kIemNativeGstReg_Pc)
5658 | RT_BIT_64(kIemNativeGstReg_EFlags));
5659 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5660
5661 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5662
5663 /*
5664 * Load the parameters.
5665 */
5666#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5667 /* Special code the hidden VBOXSTRICTRC pointer. */
5668 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5669 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5670 if (cAddParams > 0)
5671 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5672 if (cAddParams > 1)
5673 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5674 if (cAddParams > 2)
5675 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5676 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5677
5678#else
5679 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5680 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5681 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5682 if (cAddParams > 0)
5683 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5684 if (cAddParams > 1)
5685 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5686 if (cAddParams > 2)
5687# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5688 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5689# else
5690 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5691# endif
5692#endif
5693
5694 /*
5695 * Make the call.
5696 */
5697 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5698
5699#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5700 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5701#endif
5702
5703 /*
5704 * Check the status code.
5705 */
5706 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5707}
5708
5709
5710/**
5711 * Emits a call to a threaded worker function.
5712 */
5713DECL_HIDDEN_THROW(uint32_t)
5714iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5715{
5716 /* We don't know what the threaded function is doing so we must flush all pending writes. */
5717 off = iemNativeRegFlushPendingWrites(pReNative, off);
5718
5719 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5720 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5721
5722#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5723 /* The threaded function may throw / long jmp, so set current instruction
5724 number if we're counting. */
5725 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5726#endif
5727
5728 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5729
5730#ifdef RT_ARCH_AMD64
5731 /* Load the parameters and emit the call. */
5732# ifdef RT_OS_WINDOWS
5733# ifndef VBOXSTRICTRC_STRICT_ENABLED
5734 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5735 if (cParams > 0)
5736 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5737 if (cParams > 1)
5738 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5739 if (cParams > 2)
5740 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5741# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5742 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5743 if (cParams > 0)
5744 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5745 if (cParams > 1)
5746 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5747 if (cParams > 2)
5748 {
5749 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5750 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5751 }
5752 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5753# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5754# else
5755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5756 if (cParams > 0)
5757 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5758 if (cParams > 1)
5759 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5760 if (cParams > 2)
5761 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5762# endif
5763
5764 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5765
5766# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5767 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5768# endif
5769
5770#elif RT_ARCH_ARM64
5771 /*
5772 * ARM64:
5773 */
5774 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5775 if (cParams > 0)
5776 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5777 if (cParams > 1)
5778 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5779 if (cParams > 2)
5780 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5781
5782 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5783
5784#else
5785# error "port me"
5786#endif
5787
5788 /*
5789 * Check the status code.
5790 */
5791 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5792
5793 return off;
5794}
5795
5796#ifdef VBOX_WITH_STATISTICS
5797/**
5798 * Emits code to update the thread call statistics.
5799 */
5800DECL_INLINE_THROW(uint32_t)
5801iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5802{
5803 /*
5804 * Update threaded function stats.
5805 */
5806 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
5807 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
5808# if defined(RT_ARCH_ARM64)
5809 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
5810 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
5811 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
5812 iemNativeRegFreeTmp(pReNative, idxTmp1);
5813 iemNativeRegFreeTmp(pReNative, idxTmp2);
5814# else
5815 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
5816# endif
5817 return off;
5818}
5819#endif /* VBOX_WITH_STATISTICS */
5820
5821
5822/**
5823 * Emits the code at the CheckBranchMiss label.
5824 */
5825static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5826{
5827 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5828 if (idxLabel != UINT32_MAX)
5829 {
5830 iemNativeLabelDefine(pReNative, idxLabel, off);
5831
5832 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5833 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5834 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5835
5836 /* jump back to the return sequence. */
5837 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5838 }
5839 return off;
5840}
5841
5842
5843/**
5844 * Emits the code at the NeedCsLimChecking label.
5845 */
5846static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5847{
5848 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5849 if (idxLabel != UINT32_MAX)
5850 {
5851 iemNativeLabelDefine(pReNative, idxLabel, off);
5852
5853 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5854 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5855 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5856
5857 /* jump back to the return sequence. */
5858 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5859 }
5860 return off;
5861}
5862
5863
5864/**
5865 * Emits the code at the ObsoleteTb label.
5866 */
5867static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5868{
5869 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5870 if (idxLabel != UINT32_MAX)
5871 {
5872 iemNativeLabelDefine(pReNative, idxLabel, off);
5873
5874 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5875 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5876 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5877
5878 /* jump back to the return sequence. */
5879 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5880 }
5881 return off;
5882}
5883
5884
5885/**
5886 * Emits the code at the RaiseGP0 label.
5887 */
5888static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5889{
5890 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5891 if (idxLabel != UINT32_MAX)
5892 {
5893 iemNativeLabelDefine(pReNative, idxLabel, off);
5894
5895 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5896 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5897 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5898
5899 /* jump back to the return sequence. */
5900 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5901 }
5902 return off;
5903}
5904
5905
5906/**
5907 * Emits the code at the RaiseNm label.
5908 */
5909static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5910{
5911 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
5912 if (idxLabel != UINT32_MAX)
5913 {
5914 iemNativeLabelDefine(pReNative, idxLabel, off);
5915
5916 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
5917 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5918 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
5919
5920 /* jump back to the return sequence. */
5921 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5922 }
5923 return off;
5924}
5925
5926
5927/**
5928 * Emits the code at the RaiseUd label.
5929 */
5930static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5931{
5932 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
5933 if (idxLabel != UINT32_MAX)
5934 {
5935 iemNativeLabelDefine(pReNative, idxLabel, off);
5936
5937 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
5938 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5939 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
5940
5941 /* jump back to the return sequence. */
5942 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5943 }
5944 return off;
5945}
5946
5947
5948/**
5949 * Emits the code at the ReturnWithFlags label (returns
5950 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5951 */
5952static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5953{
5954 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5955 if (idxLabel != UINT32_MAX)
5956 {
5957 iemNativeLabelDefine(pReNative, idxLabel, off);
5958
5959 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5960
5961 /* jump back to the return sequence. */
5962 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5963 }
5964 return off;
5965}
5966
5967
5968/**
5969 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5970 */
5971static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5972{
5973 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5974 if (idxLabel != UINT32_MAX)
5975 {
5976 iemNativeLabelDefine(pReNative, idxLabel, off);
5977
5978 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5979
5980 /* jump back to the return sequence. */
5981 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5982 }
5983 return off;
5984}
5985
5986
5987/**
5988 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5989 */
5990static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5991{
5992 /*
5993 * Generate the rc + rcPassUp fiddling code if needed.
5994 */
5995 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5996 if (idxLabel != UINT32_MAX)
5997 {
5998 iemNativeLabelDefine(pReNative, idxLabel, off);
5999
6000 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6001#ifdef RT_ARCH_AMD64
6002# ifdef RT_OS_WINDOWS
6003# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6004 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6005# endif
6006 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6007 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6008# else
6009 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6010 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6011# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6012 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6013# endif
6014# endif
6015# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6016 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6017# endif
6018
6019#else
6020 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6021 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6022 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6023#endif
6024
6025 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6026 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6027 }
6028 return off;
6029}
6030
6031
6032/**
6033 * Emits a standard epilog.
6034 */
6035static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6036{
6037 *pidxReturnLabel = UINT32_MAX;
6038
6039 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6040 off = iemNativeRegFlushPendingWrites(pReNative, off);
6041
6042 /*
6043 * Successful return, so clear the return register (eax, w0).
6044 */
6045 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6046
6047 /*
6048 * Define label for common return point.
6049 */
6050 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6051 *pidxReturnLabel = idxReturn;
6052
6053 /*
6054 * Restore registers and return.
6055 */
6056#ifdef RT_ARCH_AMD64
6057 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6058
6059 /* Reposition esp at the r15 restore point. */
6060 pbCodeBuf[off++] = X86_OP_REX_W;
6061 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6062 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6063 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6064
6065 /* Pop non-volatile registers and return */
6066 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6067 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6068 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6069 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6070 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6071 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6072 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6073 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6074# ifdef RT_OS_WINDOWS
6075 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6076 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6077# endif
6078 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6079 pbCodeBuf[off++] = 0xc9; /* leave */
6080 pbCodeBuf[off++] = 0xc3; /* ret */
6081 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6082
6083#elif RT_ARCH_ARM64
6084 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6085
6086 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6087 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6088 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6089 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6090 IEMNATIVE_FRAME_VAR_SIZE / 8);
6091 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6092 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6093 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6094 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6095 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6096 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6097 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6098 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6099 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6100 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6101 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6102 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6103
6104 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6105 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6106 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6107 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6108
6109 /* retab / ret */
6110# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6111 if (1)
6112 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6113 else
6114# endif
6115 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6116
6117#else
6118# error "port me"
6119#endif
6120 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6121
6122 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6123}
6124
6125
6126/**
6127 * Emits a standard prolog.
6128 */
6129static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6130{
6131#ifdef RT_ARCH_AMD64
6132 /*
6133 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6134 * reserving 64 bytes for stack variables plus 4 non-register argument
6135 * slots. Fixed register assignment: xBX = pReNative;
6136 *
6137 * Since we always do the same register spilling, we can use the same
6138 * unwind description for all the code.
6139 */
6140 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6141 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6142 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6143 pbCodeBuf[off++] = 0x8b;
6144 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6145 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6146 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6147# ifdef RT_OS_WINDOWS
6148 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6149 pbCodeBuf[off++] = 0x8b;
6150 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6151 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6152 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6153# else
6154 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6155 pbCodeBuf[off++] = 0x8b;
6156 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6157# endif
6158 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6159 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6160 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6161 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6162 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6163 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6164 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6165 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6166
6167# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6168 /* Save the frame pointer. */
6169 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6170# endif
6171
6172 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6173 X86_GREG_xSP,
6174 IEMNATIVE_FRAME_ALIGN_SIZE
6175 + IEMNATIVE_FRAME_VAR_SIZE
6176 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6177 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6178 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6179 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6180 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6181
6182#elif RT_ARCH_ARM64
6183 /*
6184 * We set up a stack frame exactly like on x86, only we have to push the
6185 * return address our selves here. We save all non-volatile registers.
6186 */
6187 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6188
6189# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6190 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6191 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6192 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6193 /* pacibsp */
6194 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6195# endif
6196
6197 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6198 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6199 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6200 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6201 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6202 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6203 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6204 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6205 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6206 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6207 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6208 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6209 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6210 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6211 /* Save the BP and LR (ret address) registers at the top of the frame. */
6212 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6213 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6214 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6215 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6216 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6217 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6218
6219 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6220 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6221
6222 /* mov r28, r0 */
6223 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6224 /* mov r27, r1 */
6225 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6226
6227# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6228 /* Save the frame pointer. */
6229 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6230 ARMV8_A64_REG_X2);
6231# endif
6232
6233#else
6234# error "port me"
6235#endif
6236 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6237 return off;
6238}
6239
6240
6241
6242
6243/*********************************************************************************************************************************
6244* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
6245*********************************************************************************************************************************/
6246
6247#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
6248 { \
6249 Assert(pReNative->Core.bmVars == 0); \
6250 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
6251 Assert(pReNative->Core.bmStack == 0); \
6252 pReNative->fMc = (a_fMcFlags); \
6253 pReNative->fCImpl = (a_fCImplFlags); \
6254 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
6255
6256/** We have to get to the end in recompilation mode, as otherwise we won't
6257 * generate code for all the IEM_MC_IF_XXX branches. */
6258#define IEM_MC_END() \
6259 iemNativeVarFreeAll(pReNative); \
6260 } return off
6261
6262
6263
6264/*********************************************************************************************************************************
6265* Native Emitter Support. *
6266*********************************************************************************************************************************/
6267
6268
6269#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
6270
6271#define IEM_MC_NATIVE_ELSE() } else {
6272
6273#define IEM_MC_NATIVE_ENDIF() } ((void)0)
6274
6275
6276#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
6277 off = a_fnEmitter(pReNative, off)
6278
6279#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
6280 off = a_fnEmitter(pReNative, off, (a0))
6281
6282#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
6283 off = a_fnEmitter(pReNative, off, (a0), (a1))
6284
6285#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
6286 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
6287
6288#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
6289 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
6290
6291#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
6292 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
6293
6294#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
6295 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
6296
6297#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
6298 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
6299
6300#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
6301 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
6302
6303
6304
6305/*********************************************************************************************************************************
6306* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
6307*********************************************************************************************************************************/
6308
6309#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
6310 pReNative->fMc = 0; \
6311 pReNative->fCImpl = (a_fFlags); \
6312 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
6313
6314
6315#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6316 pReNative->fMc = 0; \
6317 pReNative->fCImpl = (a_fFlags); \
6318 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
6319
6320DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6321 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6322 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
6323{
6324 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
6325}
6326
6327
6328#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6329 pReNative->fMc = 0; \
6330 pReNative->fCImpl = (a_fFlags); \
6331 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6332 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
6333
6334DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6335 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6336 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
6337{
6338 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
6339}
6340
6341
6342#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6343 pReNative->fMc = 0; \
6344 pReNative->fCImpl = (a_fFlags); \
6345 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6346 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
6347
6348DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6349 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6350 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
6351 uint64_t uArg2)
6352{
6353 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
6354}
6355
6356
6357
6358/*********************************************************************************************************************************
6359* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
6360*********************************************************************************************************************************/
6361
6362/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
6363 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
6364DECL_INLINE_THROW(uint32_t)
6365iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6366{
6367 /*
6368 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
6369 * return with special status code and make the execution loop deal with
6370 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
6371 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
6372 * could continue w/o interruption, it probably will drop into the
6373 * debugger, so not worth the effort of trying to services it here and we
6374 * just lump it in with the handling of the others.
6375 *
6376 * To simplify the code and the register state management even more (wrt
6377 * immediate in AND operation), we always update the flags and skip the
6378 * extra check associated conditional jump.
6379 */
6380 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
6381 <= UINT32_MAX);
6382#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6383 AssertMsg( pReNative->idxCurCall == 0
6384 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
6385 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
6386#endif
6387
6388 /*
6389 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
6390 * any pending register writes must be flushed.
6391 */
6392 off = iemNativeRegFlushPendingWrites(pReNative, off);
6393
6394 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6395 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6396 true /*fSkipLivenessAssert*/);
6397 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
6398 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
6399 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
6400 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
6401 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6402
6403 /* Free but don't flush the EFLAGS register. */
6404 iemNativeRegFreeTmp(pReNative, idxEflReg);
6405
6406 return off;
6407}
6408
6409
6410/** The VINF_SUCCESS dummy. */
6411template<int const a_rcNormal>
6412DECL_FORCE_INLINE(uint32_t)
6413iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6414{
6415 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
6416 if (a_rcNormal != VINF_SUCCESS)
6417 {
6418#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6419 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6420#else
6421 RT_NOREF_PV(idxInstr);
6422#endif
6423
6424 /* As this code returns from the TB any pending register writes must be flushed. */
6425 off = iemNativeRegFlushPendingWrites(pReNative, off);
6426
6427 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
6428 }
6429 return off;
6430}
6431
6432
6433#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
6434 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6435 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6436
6437#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6438 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6439 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6440 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6441
6442/** Same as iemRegAddToRip64AndFinishingNoFlags. */
6443DECL_INLINE_THROW(uint32_t)
6444iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6445{
6446#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6447# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6448 if (!pReNative->Core.offPc)
6449 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6450# endif
6451
6452 /* Allocate a temporary PC register. */
6453 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6454
6455 /* Perform the addition and store the result. */
6456 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
6457 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6458
6459 /* Free but don't flush the PC register. */
6460 iemNativeRegFreeTmp(pReNative, idxPcReg);
6461#endif
6462
6463#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6464 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6465
6466 pReNative->Core.offPc += cbInstr;
6467# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6468 off = iemNativePcAdjustCheck(pReNative, off);
6469# endif
6470 if (pReNative->cCondDepth)
6471 off = iemNativeEmitPcWriteback(pReNative, off);
6472 else
6473 pReNative->Core.cInstrPcUpdateSkipped++;
6474#endif
6475
6476 return off;
6477}
6478
6479
6480#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6481 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6482 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6483
6484#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6485 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6486 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6487 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6488
6489/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6490DECL_INLINE_THROW(uint32_t)
6491iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6492{
6493#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6494# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6495 if (!pReNative->Core.offPc)
6496 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6497# endif
6498
6499 /* Allocate a temporary PC register. */
6500 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6501
6502 /* Perform the addition and store the result. */
6503 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6504 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6505
6506 /* Free but don't flush the PC register. */
6507 iemNativeRegFreeTmp(pReNative, idxPcReg);
6508#endif
6509
6510#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6511 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6512
6513 pReNative->Core.offPc += cbInstr;
6514# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6515 off = iemNativePcAdjustCheck(pReNative, off);
6516# endif
6517 if (pReNative->cCondDepth)
6518 off = iemNativeEmitPcWriteback(pReNative, off);
6519 else
6520 pReNative->Core.cInstrPcUpdateSkipped++;
6521#endif
6522
6523 return off;
6524}
6525
6526
6527#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6528 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6529 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6530
6531#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6532 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6533 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6534 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6535
6536/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6537DECL_INLINE_THROW(uint32_t)
6538iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6539{
6540#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6541# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6542 if (!pReNative->Core.offPc)
6543 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6544# endif
6545
6546 /* Allocate a temporary PC register. */
6547 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6548
6549 /* Perform the addition and store the result. */
6550 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6551 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6552 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6553
6554 /* Free but don't flush the PC register. */
6555 iemNativeRegFreeTmp(pReNative, idxPcReg);
6556#endif
6557
6558#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6559 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6560
6561 pReNative->Core.offPc += cbInstr;
6562# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6563 off = iemNativePcAdjustCheck(pReNative, off);
6564# endif
6565 if (pReNative->cCondDepth)
6566 off = iemNativeEmitPcWriteback(pReNative, off);
6567 else
6568 pReNative->Core.cInstrPcUpdateSkipped++;
6569#endif
6570
6571 return off;
6572}
6573
6574
6575
6576/*********************************************************************************************************************************
6577* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6578*********************************************************************************************************************************/
6579
6580#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6581 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6582 (a_enmEffOpSize), pCallEntry->idxInstr); \
6583 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6584
6585#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6586 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6587 (a_enmEffOpSize), pCallEntry->idxInstr); \
6588 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6589 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6590
6591#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6592 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6593 IEMMODE_16BIT, pCallEntry->idxInstr); \
6594 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6595
6596#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6597 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6598 IEMMODE_16BIT, pCallEntry->idxInstr); \
6599 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6600 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6601
6602#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6603 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6604 IEMMODE_64BIT, pCallEntry->idxInstr); \
6605 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6606
6607#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6608 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6609 IEMMODE_64BIT, pCallEntry->idxInstr); \
6610 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6611 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6612
6613/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6614 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6615 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6616DECL_INLINE_THROW(uint32_t)
6617iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6618 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6619{
6620 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6621
6622 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6623 off = iemNativeRegFlushPendingWrites(pReNative, off);
6624
6625#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6626 Assert(pReNative->Core.offPc == 0);
6627
6628 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6629#endif
6630
6631 /* Allocate a temporary PC register. */
6632 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6633
6634 /* Perform the addition. */
6635 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6636
6637 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6638 {
6639 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6640 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6641 }
6642 else
6643 {
6644 /* Just truncate the result to 16-bit IP. */
6645 Assert(enmEffOpSize == IEMMODE_16BIT);
6646 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6647 }
6648 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6649
6650 /* Free but don't flush the PC register. */
6651 iemNativeRegFreeTmp(pReNative, idxPcReg);
6652
6653 return off;
6654}
6655
6656
6657#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6658 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6659 (a_enmEffOpSize), pCallEntry->idxInstr); \
6660 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6661
6662#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6663 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6664 (a_enmEffOpSize), pCallEntry->idxInstr); \
6665 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6666 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6667
6668#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6669 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6670 IEMMODE_16BIT, pCallEntry->idxInstr); \
6671 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6672
6673#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6674 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6675 IEMMODE_16BIT, pCallEntry->idxInstr); \
6676 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6677 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6678
6679#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6680 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6681 IEMMODE_32BIT, pCallEntry->idxInstr); \
6682 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6683
6684#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6685 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6686 IEMMODE_32BIT, pCallEntry->idxInstr); \
6687 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6688 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6689
6690/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6691 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6692 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6693DECL_INLINE_THROW(uint32_t)
6694iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6695 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6696{
6697 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6698
6699 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6700 off = iemNativeRegFlushPendingWrites(pReNative, off);
6701
6702#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6703 Assert(pReNative->Core.offPc == 0);
6704
6705 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6706#endif
6707
6708 /* Allocate a temporary PC register. */
6709 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6710
6711 /* Perform the addition. */
6712 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6713
6714 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6715 if (enmEffOpSize == IEMMODE_16BIT)
6716 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6717
6718 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6719/** @todo we can skip this in 32-bit FLAT mode. */
6720 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6721
6722 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6723
6724 /* Free but don't flush the PC register. */
6725 iemNativeRegFreeTmp(pReNative, idxPcReg);
6726
6727 return off;
6728}
6729
6730
6731#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6732 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6733 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6734
6735#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6736 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6737 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6738 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6739
6740#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6741 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6742 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6743
6744#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6745 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6746 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6747 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6748
6749#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6750 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6751 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6752
6753#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6754 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6755 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6756 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6757
6758/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6759DECL_INLINE_THROW(uint32_t)
6760iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6761 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6762{
6763 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6764 off = iemNativeRegFlushPendingWrites(pReNative, off);
6765
6766#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6767 Assert(pReNative->Core.offPc == 0);
6768
6769 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6770#endif
6771
6772 /* Allocate a temporary PC register. */
6773 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6774
6775 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6776 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6777 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6778 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6779 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6780
6781 /* Free but don't flush the PC register. */
6782 iemNativeRegFreeTmp(pReNative, idxPcReg);
6783
6784 return off;
6785}
6786
6787
6788
6789/*********************************************************************************************************************************
6790* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6791*********************************************************************************************************************************/
6792
6793/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6794#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6795 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6796
6797/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6798#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6799 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6800
6801/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6802#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6803 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6804
6805/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6806 * clears flags. */
6807#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6808 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6809 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6810
6811/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6812 * clears flags. */
6813#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6814 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6815 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6816
6817/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6818 * clears flags. */
6819#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6820 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6821 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6822
6823#undef IEM_MC_SET_RIP_U16_AND_FINISH
6824
6825
6826/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6827#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6828 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6829
6830/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6831#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6832 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6833
6834/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6835 * clears flags. */
6836#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6837 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6838 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6839
6840/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6841 * and clears flags. */
6842#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6843 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6844 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6845
6846#undef IEM_MC_SET_RIP_U32_AND_FINISH
6847
6848
6849/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6850#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6851 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6852
6853/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6854 * and clears flags. */
6855#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6856 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6857 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6858
6859#undef IEM_MC_SET_RIP_U64_AND_FINISH
6860
6861
6862/** Same as iemRegRipJumpU16AndFinishNoFlags,
6863 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6864DECL_INLINE_THROW(uint32_t)
6865iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6866 uint8_t idxInstr, uint8_t cbVar)
6867{
6868 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6869 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
6870
6871 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6872 off = iemNativeRegFlushPendingWrites(pReNative, off);
6873
6874#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6875 Assert(pReNative->Core.offPc == 0);
6876
6877 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6878#endif
6879
6880 /* Get a register with the new PC loaded from idxVarPc.
6881 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6882 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6883
6884 /* Check limit (may #GP(0) + exit TB). */
6885 if (!f64Bit)
6886/** @todo we can skip this test in FLAT 32-bit mode. */
6887 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6888 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6889 else if (cbVar > sizeof(uint32_t))
6890 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6891
6892 /* Store the result. */
6893 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6894
6895 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6896 /** @todo implictly free the variable? */
6897
6898 return off;
6899}
6900
6901
6902
6903/*********************************************************************************************************************************
6904* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
6905*********************************************************************************************************************************/
6906
6907#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
6908 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
6909
6910/**
6911 * Emits code to check if a \#NM exception should be raised.
6912 *
6913 * @returns New code buffer offset, UINT32_MAX on failure.
6914 * @param pReNative The native recompile state.
6915 * @param off The code buffer offset.
6916 * @param idxInstr The current instruction.
6917 */
6918DECL_INLINE_THROW(uint32_t)
6919iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6920{
6921 /*
6922 * Make sure we don't have any outstanding guest register writes as we may
6923 * raise an #NM and all guest register must be up to date in CPUMCTX.
6924 *
6925 * @todo r=aeichner Can we postpone this to the RaiseNm path?
6926 */
6927 off = iemNativeRegFlushPendingWrites(pReNative, off);
6928
6929#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6930 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6931#else
6932 RT_NOREF(idxInstr);
6933#endif
6934
6935 /* Allocate a temporary CR0 register. */
6936 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
6937 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
6938
6939 /*
6940 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
6941 * return raisexcpt();
6942 */
6943 /* Test and jump. */
6944 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
6945
6946 /* Free but don't flush the CR0 register. */
6947 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
6948
6949 return off;
6950}
6951
6952
6953#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
6954 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
6955
6956/**
6957 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
6958 *
6959 * @returns New code buffer offset, UINT32_MAX on failure.
6960 * @param pReNative The native recompile state.
6961 * @param off The code buffer offset.
6962 * @param idxInstr The current instruction.
6963 */
6964DECL_INLINE_THROW(uint32_t)
6965iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6966{
6967 /*
6968 * Make sure we don't have any outstanding guest register writes as we may
6969 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
6970 *
6971 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
6972 */
6973 off = iemNativeRegFlushPendingWrites(pReNative, off);
6974
6975#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6976 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6977#else
6978 RT_NOREF(idxInstr);
6979#endif
6980
6981 /* Allocate a temporary CR0 and CR4 register. */
6982 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
6983 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
6984 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
6985 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
6986
6987 /** @todo r=aeichner Optimize this more later to have less compares and branches,
6988 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
6989 * actual performance benefit first). */
6990 /*
6991 * if (cr0 & X86_CR0_EM)
6992 * return raisexcpt();
6993 */
6994 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM, idxLabelRaiseUd);
6995 /*
6996 * if (!(cr4 & X86_CR4_OSFXSR))
6997 * return raisexcpt();
6998 */
6999 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR, idxLabelRaiseUd);
7000 /*
7001 * if (cr0 & X86_CR0_TS)
7002 * return raisexcpt();
7003 */
7004 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_TS, idxLabelRaiseNm);
7005
7006 /* Free but don't flush the CR0 and CR4 register. */
7007 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
7008 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
7009
7010 return off;
7011}
7012
7013
7014#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
7015 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
7016
7017/**
7018 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
7019 *
7020 * @returns New code buffer offset, UINT32_MAX on failure.
7021 * @param pReNative The native recompile state.
7022 * @param off The code buffer offset.
7023 * @param idxInstr The current instruction.
7024 */
7025DECL_INLINE_THROW(uint32_t)
7026iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7027{
7028 /*
7029 * Make sure we don't have any outstanding guest register writes as we may
7030 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
7031 *
7032 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
7033 */
7034 off = iemNativeRegFlushPendingWrites(pReNative, off);
7035
7036#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7037 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7038#else
7039 RT_NOREF(idxInstr);
7040#endif
7041
7042 /* Allocate a temporary CR0, CR4 and XCR0 register. */
7043 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
7044 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
7045 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
7046 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
7047 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
7048
7049#if 1
7050 off = iemNativeEmitBrk(pReNative, off, 0x4223); /** @todo Test this when AVX gets actually available. */
7051#endif
7052
7053 /** @todo r=aeichner Optimize this more later to have less compares and branches,
7054 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
7055 * actual performance benefit first). */
7056 /*
7057 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
7058 * return raisexcpt();
7059 */
7060 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
7061 off = iemNativeEmitOrGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
7062 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
7063 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7064
7065 /*
7066 * if (!(cr4 & X86_CR4_OSXSAVE))
7067 * return raisexcpt();
7068 */
7069 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE, idxLabelRaiseUd);
7070 /*
7071 * if (cr0 & X86_CR0_TS)
7072 * return raisexcpt();
7073 */
7074 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_TS, idxLabelRaiseNm);
7075
7076 /* Free but don't flush the CR0, CR4 and XCR0 register. */
7077 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
7078 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
7079 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
7080
7081 return off;
7082}
7083
7084
7085
7086/*********************************************************************************************************************************
7087* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
7088*********************************************************************************************************************************/
7089
7090/**
7091 * Pushes an IEM_MC_IF_XXX onto the condition stack.
7092 *
7093 * @returns Pointer to the condition stack entry on success, NULL on failure
7094 * (too many nestings)
7095 */
7096DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
7097{
7098#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7099 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
7100#endif
7101
7102 uint32_t const idxStack = pReNative->cCondDepth;
7103 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
7104
7105 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
7106 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
7107
7108 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
7109 pEntry->fInElse = false;
7110 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
7111 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
7112
7113 return pEntry;
7114}
7115
7116
7117/**
7118 * Start of the if-block, snapshotting the register and variable state.
7119 */
7120DECL_INLINE_THROW(void)
7121iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
7122{
7123 Assert(offIfBlock != UINT32_MAX);
7124 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7125 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7126 Assert(!pEntry->fInElse);
7127
7128 /* Define the start of the IF block if request or for disassembly purposes. */
7129 if (idxLabelIf != UINT32_MAX)
7130 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
7131#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7132 else
7133 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
7134#else
7135 RT_NOREF(offIfBlock);
7136#endif
7137
7138#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7139 Assert(pReNative->Core.offPc == 0);
7140#endif
7141
7142 /* Copy the initial state so we can restore it in the 'else' block. */
7143 pEntry->InitialState = pReNative->Core;
7144}
7145
7146
7147#define IEM_MC_ELSE() } while (0); \
7148 off = iemNativeEmitElse(pReNative, off); \
7149 do {
7150
7151/** Emits code related to IEM_MC_ELSE. */
7152DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7153{
7154 /* Check sanity and get the conditional stack entry. */
7155 Assert(off != UINT32_MAX);
7156 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7157 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7158 Assert(!pEntry->fInElse);
7159
7160 /* Jump to the endif */
7161 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
7162
7163 /* Define the else label and enter the else part of the condition. */
7164 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
7165 pEntry->fInElse = true;
7166
7167#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7168 Assert(pReNative->Core.offPc == 0);
7169#endif
7170
7171 /* Snapshot the core state so we can do a merge at the endif and restore
7172 the snapshot we took at the start of the if-block. */
7173 pEntry->IfFinalState = pReNative->Core;
7174 pReNative->Core = pEntry->InitialState;
7175
7176 return off;
7177}
7178
7179
7180#define IEM_MC_ENDIF() } while (0); \
7181 off = iemNativeEmitEndIf(pReNative, off)
7182
7183/** Emits code related to IEM_MC_ENDIF. */
7184DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7185{
7186 /* Check sanity and get the conditional stack entry. */
7187 Assert(off != UINT32_MAX);
7188 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7189 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7190
7191#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7192 Assert(pReNative->Core.offPc == 0);
7193#endif
7194
7195 /*
7196 * Now we have find common group with the core state at the end of the
7197 * if-final. Use the smallest common denominator and just drop anything
7198 * that isn't the same in both states.
7199 */
7200 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
7201 * which is why we're doing this at the end of the else-block.
7202 * But we'd need more info about future for that to be worth the effort. */
7203 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
7204 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
7205 {
7206 /* shadow guest stuff first. */
7207 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
7208 if (fGstRegs)
7209 {
7210 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
7211 do
7212 {
7213 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
7214 fGstRegs &= ~RT_BIT_64(idxGstReg);
7215
7216 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
7217 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
7218 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
7219 {
7220 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
7221 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
7222 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
7223 }
7224 } while (fGstRegs);
7225 }
7226 else
7227 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
7228
7229 /* Check variables next. For now we must require them to be identical
7230 or stuff we can recreate. */
7231 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
7232 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
7233 if (fVars)
7234 {
7235 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
7236 do
7237 {
7238 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
7239 fVars &= ~RT_BIT_32(idxVar);
7240
7241 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
7242 {
7243 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
7244 continue;
7245 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7246 {
7247 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7248 if (idxHstReg != UINT8_MAX)
7249 {
7250 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7251 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7252 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
7253 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7254 }
7255 continue;
7256 }
7257 }
7258 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
7259 continue;
7260
7261 /* Irreconcilable, so drop it. */
7262 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7263 if (idxHstReg != UINT8_MAX)
7264 {
7265 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7266 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7267 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
7268 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7269 }
7270 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7271 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7272 } while (fVars);
7273 }
7274
7275 /* Finally, check that the host register allocations matches. */
7276 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
7277 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
7278 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
7279 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
7280 }
7281
7282 /*
7283 * Define the endif label and maybe the else one if we're still in the 'if' part.
7284 */
7285 if (!pEntry->fInElse)
7286 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
7287 else
7288 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
7289 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
7290
7291 /* Pop the conditional stack.*/
7292 pReNative->cCondDepth -= 1;
7293
7294 return off;
7295}
7296
7297
7298#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
7299 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
7300 do {
7301
7302/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
7303DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7304{
7305 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7306
7307 /* Get the eflags. */
7308 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7309 kIemNativeGstRegUse_ReadOnly);
7310
7311 /* Test and jump. */
7312 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7313
7314 /* Free but don't flush the EFlags register. */
7315 iemNativeRegFreeTmp(pReNative, idxEflReg);
7316
7317 /* Make a copy of the core state now as we start the if-block. */
7318 iemNativeCondStartIfBlock(pReNative, off);
7319
7320 return off;
7321}
7322
7323
7324#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
7325 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
7326 do {
7327
7328/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
7329DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7330{
7331 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7332
7333 /* Get the eflags. */
7334 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7335 kIemNativeGstRegUse_ReadOnly);
7336
7337 /* Test and jump. */
7338 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7339
7340 /* Free but don't flush the EFlags register. */
7341 iemNativeRegFreeTmp(pReNative, idxEflReg);
7342
7343 /* Make a copy of the core state now as we start the if-block. */
7344 iemNativeCondStartIfBlock(pReNative, off);
7345
7346 return off;
7347}
7348
7349
7350#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
7351 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
7352 do {
7353
7354/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
7355DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7356{
7357 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7358
7359 /* Get the eflags. */
7360 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7361 kIemNativeGstRegUse_ReadOnly);
7362
7363 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7364 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7365
7366 /* Test and jump. */
7367 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7368
7369 /* Free but don't flush the EFlags register. */
7370 iemNativeRegFreeTmp(pReNative, idxEflReg);
7371
7372 /* Make a copy of the core state now as we start the if-block. */
7373 iemNativeCondStartIfBlock(pReNative, off);
7374
7375 return off;
7376}
7377
7378
7379#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
7380 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
7381 do {
7382
7383/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
7384DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7385{
7386 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7387
7388 /* Get the eflags. */
7389 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7390 kIemNativeGstRegUse_ReadOnly);
7391
7392 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7393 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7394
7395 /* Test and jump. */
7396 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7397
7398 /* Free but don't flush the EFlags register. */
7399 iemNativeRegFreeTmp(pReNative, idxEflReg);
7400
7401 /* Make a copy of the core state now as we start the if-block. */
7402 iemNativeCondStartIfBlock(pReNative, off);
7403
7404 return off;
7405}
7406
7407
7408#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
7409 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
7410 do {
7411
7412#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
7413 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
7414 do {
7415
7416/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
7417DECL_INLINE_THROW(uint32_t)
7418iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7419 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7420{
7421 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7422
7423 /* Get the eflags. */
7424 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7425 kIemNativeGstRegUse_ReadOnly);
7426
7427 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7428 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7429
7430 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7431 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7432 Assert(iBitNo1 != iBitNo2);
7433
7434#ifdef RT_ARCH_AMD64
7435 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
7436
7437 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7438 if (iBitNo1 > iBitNo2)
7439 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7440 else
7441 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7442 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7443
7444#elif defined(RT_ARCH_ARM64)
7445 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7446 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7447
7448 /* and tmpreg, eflreg, #1<<iBitNo1 */
7449 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7450
7451 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7452 if (iBitNo1 > iBitNo2)
7453 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7454 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7455 else
7456 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7457 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7458
7459 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7460
7461#else
7462# error "Port me"
7463#endif
7464
7465 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7466 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7467 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7468
7469 /* Free but don't flush the EFlags and tmp registers. */
7470 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7471 iemNativeRegFreeTmp(pReNative, idxEflReg);
7472
7473 /* Make a copy of the core state now as we start the if-block. */
7474 iemNativeCondStartIfBlock(pReNative, off);
7475
7476 return off;
7477}
7478
7479
7480#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
7481 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
7482 do {
7483
7484#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
7485 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
7486 do {
7487
7488/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
7489 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
7490DECL_INLINE_THROW(uint32_t)
7491iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
7492 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7493{
7494 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7495
7496 /* We need an if-block label for the non-inverted variant. */
7497 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
7498 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
7499
7500 /* Get the eflags. */
7501 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7502 kIemNativeGstRegUse_ReadOnly);
7503
7504 /* Translate the flag masks to bit numbers. */
7505 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7506 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7507
7508 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7509 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7510 Assert(iBitNo1 != iBitNo);
7511
7512 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7513 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7514 Assert(iBitNo2 != iBitNo);
7515 Assert(iBitNo2 != iBitNo1);
7516
7517#ifdef RT_ARCH_AMD64
7518 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
7519#elif defined(RT_ARCH_ARM64)
7520 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7521#endif
7522
7523 /* Check for the lone bit first. */
7524 if (!fInverted)
7525 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7526 else
7527 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
7528
7529 /* Then extract and compare the other two bits. */
7530#ifdef RT_ARCH_AMD64
7531 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7532 if (iBitNo1 > iBitNo2)
7533 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7534 else
7535 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7536 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7537
7538#elif defined(RT_ARCH_ARM64)
7539 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7540
7541 /* and tmpreg, eflreg, #1<<iBitNo1 */
7542 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7543
7544 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7545 if (iBitNo1 > iBitNo2)
7546 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7547 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7548 else
7549 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7550 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7551
7552 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7553
7554#else
7555# error "Port me"
7556#endif
7557
7558 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7559 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7560 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7561
7562 /* Free but don't flush the EFlags and tmp registers. */
7563 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7564 iemNativeRegFreeTmp(pReNative, idxEflReg);
7565
7566 /* Make a copy of the core state now as we start the if-block. */
7567 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
7568
7569 return off;
7570}
7571
7572
7573#define IEM_MC_IF_CX_IS_NZ() \
7574 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
7575 do {
7576
7577/** Emits code for IEM_MC_IF_CX_IS_NZ. */
7578DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7579{
7580 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7581
7582 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7583 kIemNativeGstRegUse_ReadOnly);
7584 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
7585 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7586
7587 iemNativeCondStartIfBlock(pReNative, off);
7588 return off;
7589}
7590
7591
7592#define IEM_MC_IF_ECX_IS_NZ() \
7593 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
7594 do {
7595
7596#define IEM_MC_IF_RCX_IS_NZ() \
7597 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
7598 do {
7599
7600/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
7601DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7602{
7603 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7604
7605 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7606 kIemNativeGstRegUse_ReadOnly);
7607 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
7608 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7609
7610 iemNativeCondStartIfBlock(pReNative, off);
7611 return off;
7612}
7613
7614
7615#define IEM_MC_IF_CX_IS_NOT_ONE() \
7616 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
7617 do {
7618
7619/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
7620DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7621{
7622 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7623
7624 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7625 kIemNativeGstRegUse_ReadOnly);
7626#ifdef RT_ARCH_AMD64
7627 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7628#else
7629 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7630 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7631 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7632#endif
7633 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7634
7635 iemNativeCondStartIfBlock(pReNative, off);
7636 return off;
7637}
7638
7639
7640#define IEM_MC_IF_ECX_IS_NOT_ONE() \
7641 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
7642 do {
7643
7644#define IEM_MC_IF_RCX_IS_NOT_ONE() \
7645 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
7646 do {
7647
7648/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
7649DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7650{
7651 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7652
7653 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7654 kIemNativeGstRegUse_ReadOnly);
7655 if (f64Bit)
7656 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7657 else
7658 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7659 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7660
7661 iemNativeCondStartIfBlock(pReNative, off);
7662 return off;
7663}
7664
7665
7666#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7667 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
7668 do {
7669
7670#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7671 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
7672 do {
7673
7674/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
7675 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7676DECL_INLINE_THROW(uint32_t)
7677iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
7678{
7679 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7680
7681 /* We have to load both RCX and EFLAGS before we can start branching,
7682 otherwise we'll end up in the else-block with an inconsistent
7683 register allocator state.
7684 Doing EFLAGS first as it's more likely to be loaded, right? */
7685 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7686 kIemNativeGstRegUse_ReadOnly);
7687 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7688 kIemNativeGstRegUse_ReadOnly);
7689
7690 /** @todo we could reduce this to a single branch instruction by spending a
7691 * temporary register and some setnz stuff. Not sure if loops are
7692 * worth it. */
7693 /* Check CX. */
7694#ifdef RT_ARCH_AMD64
7695 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7696#else
7697 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7698 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7699 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7700#endif
7701
7702 /* Check the EFlags bit. */
7703 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7704 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7705 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7706 !fCheckIfSet /*fJmpIfSet*/);
7707
7708 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7709 iemNativeRegFreeTmp(pReNative, idxEflReg);
7710
7711 iemNativeCondStartIfBlock(pReNative, off);
7712 return off;
7713}
7714
7715
7716#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7717 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
7718 do {
7719
7720#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7721 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
7722 do {
7723
7724#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7725 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
7726 do {
7727
7728#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7729 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
7730 do {
7731
7732/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
7733 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
7734 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
7735 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7736DECL_INLINE_THROW(uint32_t)
7737iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7738 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
7739{
7740 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7741
7742 /* We have to load both RCX and EFLAGS before we can start branching,
7743 otherwise we'll end up in the else-block with an inconsistent
7744 register allocator state.
7745 Doing EFLAGS first as it's more likely to be loaded, right? */
7746 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7747 kIemNativeGstRegUse_ReadOnly);
7748 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7749 kIemNativeGstRegUse_ReadOnly);
7750
7751 /** @todo we could reduce this to a single branch instruction by spending a
7752 * temporary register and some setnz stuff. Not sure if loops are
7753 * worth it. */
7754 /* Check RCX/ECX. */
7755 if (f64Bit)
7756 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7757 else
7758 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7759
7760 /* Check the EFlags bit. */
7761 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7762 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7763 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7764 !fCheckIfSet /*fJmpIfSet*/);
7765
7766 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7767 iemNativeRegFreeTmp(pReNative, idxEflReg);
7768
7769 iemNativeCondStartIfBlock(pReNative, off);
7770 return off;
7771}
7772
7773
7774
7775/*********************************************************************************************************************************
7776* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7777*********************************************************************************************************************************/
7778/** Number of hidden arguments for CIMPL calls.
7779 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7780#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7781# define IEM_CIMPL_HIDDEN_ARGS 3
7782#else
7783# define IEM_CIMPL_HIDDEN_ARGS 2
7784#endif
7785
7786#define IEM_MC_NOREF(a_Name) \
7787 RT_NOREF_PV(a_Name)
7788
7789#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7790 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7791
7792#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7793 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7794
7795#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7796 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7797
7798#define IEM_MC_LOCAL(a_Type, a_Name) \
7799 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7800
7801#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7802 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7803
7804
7805/**
7806 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7807 */
7808DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7809{
7810 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7811 return IEM_CIMPL_HIDDEN_ARGS;
7812 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7813 return 1;
7814 return 0;
7815}
7816
7817
7818/**
7819 * Internal work that allocates a variable with kind set to
7820 * kIemNativeVarKind_Invalid and no current stack allocation.
7821 *
7822 * The kind will either be set by the caller or later when the variable is first
7823 * assigned a value.
7824 *
7825 * @returns Unpacked index.
7826 * @internal
7827 */
7828static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7829{
7830 Assert(cbType > 0 && cbType <= 64);
7831 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7832 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7833 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7834 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7835 pReNative->Core.aVars[idxVar].cbVar = cbType;
7836 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7837 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7838 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7839 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7840 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7841 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7842 pReNative->Core.aVars[idxVar].u.uValue = 0;
7843 return idxVar;
7844}
7845
7846
7847/**
7848 * Internal work that allocates an argument variable w/o setting enmKind.
7849 *
7850 * @returns Unpacked index.
7851 * @internal
7852 */
7853static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7854{
7855 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7856 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7857 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7858
7859 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7860 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7861 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7862 return idxVar;
7863}
7864
7865
7866/**
7867 * Gets the stack slot for a stack variable, allocating one if necessary.
7868 *
7869 * Calling this function implies that the stack slot will contain a valid
7870 * variable value. The caller deals with any register currently assigned to the
7871 * variable, typically by spilling it into the stack slot.
7872 *
7873 * @returns The stack slot number.
7874 * @param pReNative The recompiler state.
7875 * @param idxVar The variable.
7876 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7877 */
7878DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7879{
7880 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7881 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7882 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7883
7884 /* Already got a slot? */
7885 uint8_t const idxStackSlot = pVar->idxStackSlot;
7886 if (idxStackSlot != UINT8_MAX)
7887 {
7888 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7889 return idxStackSlot;
7890 }
7891
7892 /*
7893 * A single slot is easy to allocate.
7894 * Allocate them from the top end, closest to BP, to reduce the displacement.
7895 */
7896 if (pVar->cbVar <= sizeof(uint64_t))
7897 {
7898 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7899 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7900 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7901 pVar->idxStackSlot = (uint8_t)iSlot;
7902 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7903 return (uint8_t)iSlot;
7904 }
7905
7906 /*
7907 * We need more than one stack slot.
7908 *
7909 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7910 */
7911 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7912 Assert(pVar->cbVar <= 64);
7913 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7914 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7915 uint32_t bmStack = ~pReNative->Core.bmStack;
7916 while (bmStack != UINT32_MAX)
7917 {
7918/** @todo allocate from the top to reduce BP displacement. */
7919 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7920 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7921 if (!(iSlot & fBitAlignMask))
7922 {
7923 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7924 {
7925 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7926 pVar->idxStackSlot = (uint8_t)iSlot;
7927 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7928 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7929 return (uint8_t)iSlot;
7930 }
7931 }
7932 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7933 }
7934 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7935}
7936
7937
7938/**
7939 * Changes the variable to a stack variable.
7940 *
7941 * Currently this is s only possible to do the first time the variable is used,
7942 * switching later is can be implemented but not done.
7943 *
7944 * @param pReNative The recompiler state.
7945 * @param idxVar The variable.
7946 * @throws VERR_IEM_VAR_IPE_2
7947 */
7948static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7949{
7950 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7951 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7952 if (pVar->enmKind != kIemNativeVarKind_Stack)
7953 {
7954 /* We could in theory transition from immediate to stack as well, but it
7955 would involve the caller doing work storing the value on the stack. So,
7956 till that's required we only allow transition from invalid. */
7957 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7958 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7959 pVar->enmKind = kIemNativeVarKind_Stack;
7960
7961 /* Note! We don't allocate a stack slot here, that's only done when a
7962 slot is actually needed to hold a variable value. */
7963 }
7964}
7965
7966
7967/**
7968 * Sets it to a variable with a constant value.
7969 *
7970 * This does not require stack storage as we know the value and can always
7971 * reload it, unless of course it's referenced.
7972 *
7973 * @param pReNative The recompiler state.
7974 * @param idxVar The variable.
7975 * @param uValue The immediate value.
7976 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7977 */
7978static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7979{
7980 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7981 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7982 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7983 {
7984 /* Only simple transitions for now. */
7985 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7986 pVar->enmKind = kIemNativeVarKind_Immediate;
7987 }
7988 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7989
7990 pVar->u.uValue = uValue;
7991 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7992 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7993 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7994}
7995
7996
7997/**
7998 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7999 *
8000 * This does not require stack storage as we know the value and can always
8001 * reload it. Loading is postponed till needed.
8002 *
8003 * @param pReNative The recompiler state.
8004 * @param idxVar The variable. Unpacked.
8005 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
8006 *
8007 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8008 * @internal
8009 */
8010static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
8011{
8012 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
8013 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
8014
8015 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
8016 {
8017 /* Only simple transitions for now. */
8018 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
8019 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8020 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
8021 }
8022 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8023
8024 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
8025
8026 /* Update the other variable, ensure it's a stack variable. */
8027 /** @todo handle variables with const values... that'll go boom now. */
8028 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
8029 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8030}
8031
8032
8033/**
8034 * Sets the variable to a reference (pointer) to a guest register reference.
8035 *
8036 * This does not require stack storage as we know the value and can always
8037 * reload it. Loading is postponed till needed.
8038 *
8039 * @param pReNative The recompiler state.
8040 * @param idxVar The variable.
8041 * @param enmRegClass The class guest registers to reference.
8042 * @param idxReg The register within @a enmRegClass to reference.
8043 *
8044 * @throws VERR_IEM_VAR_IPE_2
8045 */
8046static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8047 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
8048{
8049 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8050 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8051
8052 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
8053 {
8054 /* Only simple transitions for now. */
8055 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8056 pVar->enmKind = kIemNativeVarKind_GstRegRef;
8057 }
8058 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8059
8060 pVar->u.GstRegRef.enmClass = enmRegClass;
8061 pVar->u.GstRegRef.idx = idxReg;
8062}
8063
8064
8065DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
8066{
8067 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8068}
8069
8070
8071DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
8072{
8073 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8074
8075 /* Since we're using a generic uint64_t value type, we must truncate it if
8076 the variable is smaller otherwise we may end up with too large value when
8077 scaling up a imm8 w/ sign-extension.
8078
8079 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
8080 in the bios, bx=1) when running on arm, because clang expect 16-bit
8081 register parameters to have bits 16 and up set to zero. Instead of
8082 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
8083 CF value in the result. */
8084 switch (cbType)
8085 {
8086 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8087 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8088 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8089 }
8090 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8091 return idxVar;
8092}
8093
8094
8095DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
8096{
8097 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
8098 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
8099 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
8100 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
8101 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
8102 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
8103
8104 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
8105 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
8106 return idxArgVar;
8107}
8108
8109
8110DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
8111{
8112 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8113 /* Don't set to stack now, leave that to the first use as for instance
8114 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
8115 return idxVar;
8116}
8117
8118
8119DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
8120{
8121 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8122
8123 /* Since we're using a generic uint64_t value type, we must truncate it if
8124 the variable is smaller otherwise we may end up with too large value when
8125 scaling up a imm8 w/ sign-extension. */
8126 switch (cbType)
8127 {
8128 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8129 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8130 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8131 }
8132 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8133 return idxVar;
8134}
8135
8136
8137/**
8138 * Makes sure variable @a idxVar has a register assigned to it and that it stays
8139 * fixed till we call iemNativeVarRegisterRelease.
8140 *
8141 * @returns The host register number.
8142 * @param pReNative The recompiler state.
8143 * @param idxVar The variable.
8144 * @param poff Pointer to the instruction buffer offset.
8145 * In case a register needs to be freed up or the value
8146 * loaded off the stack.
8147 * @param fInitialized Set if the variable must already have been initialized.
8148 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8149 * the case.
8150 * @param idxRegPref Preferred register number or UINT8_MAX.
8151 */
8152DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8153 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8154{
8155 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8156 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8157 Assert(pVar->cbVar <= 8);
8158 Assert(!pVar->fRegAcquired);
8159
8160 uint8_t idxReg = pVar->idxReg;
8161 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8162 {
8163 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8164 && pVar->enmKind < kIemNativeVarKind_End);
8165 pVar->fRegAcquired = true;
8166 return idxReg;
8167 }
8168
8169 /*
8170 * If the kind of variable has not yet been set, default to 'stack'.
8171 */
8172 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8173 && pVar->enmKind < kIemNativeVarKind_End);
8174 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8175 iemNativeVarSetKindToStack(pReNative, idxVar);
8176
8177 /*
8178 * We have to allocate a register for the variable, even if its a stack one
8179 * as we don't know if there are modification being made to it before its
8180 * finalized (todo: analyze and insert hints about that?).
8181 *
8182 * If we can, we try get the correct register for argument variables. This
8183 * is assuming that most argument variables are fetched as close as possible
8184 * to the actual call, so that there aren't any interfering hidden calls
8185 * (memory accesses, etc) inbetween.
8186 *
8187 * If we cannot or it's a variable, we make sure no argument registers
8188 * that will be used by this MC block will be allocated here, and we always
8189 * prefer non-volatile registers to avoid needing to spill stuff for internal
8190 * call.
8191 */
8192 /** @todo Detect too early argument value fetches and warn about hidden
8193 * calls causing less optimal code to be generated in the python script. */
8194
8195 uint8_t const uArgNo = pVar->uArgNo;
8196 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
8197 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
8198 {
8199 idxReg = g_aidxIemNativeCallRegs[uArgNo];
8200 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8201 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
8202 }
8203 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
8204 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
8205 {
8206 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8207 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
8208 & ~pReNative->Core.bmHstRegsWithGstShadow
8209 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
8210 & fNotArgsMask;
8211 if (fRegs)
8212 {
8213 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
8214 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
8215 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
8216 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
8217 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
8218 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8219 }
8220 else
8221 {
8222 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8223 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
8224 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8225 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8226 }
8227 }
8228 else
8229 {
8230 idxReg = idxRegPref;
8231 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8232 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8233 }
8234 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8235 pVar->idxReg = idxReg;
8236
8237 /*
8238 * Load it off the stack if we've got a stack slot.
8239 */
8240 uint8_t const idxStackSlot = pVar->idxStackSlot;
8241 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8242 {
8243 Assert(fInitialized);
8244 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8245 switch (pVar->cbVar)
8246 {
8247 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
8248 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
8249 case 3: AssertFailed(); RT_FALL_THRU();
8250 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
8251 default: AssertFailed(); RT_FALL_THRU();
8252 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
8253 }
8254 }
8255 else
8256 {
8257 Assert(idxStackSlot == UINT8_MAX);
8258 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8259 }
8260 pVar->fRegAcquired = true;
8261 return idxReg;
8262}
8263
8264
8265/**
8266 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8267 * guest register.
8268 *
8269 * This function makes sure there is a register for it and sets it to be the
8270 * current shadow copy of @a enmGstReg.
8271 *
8272 * @returns The host register number.
8273 * @param pReNative The recompiler state.
8274 * @param idxVar The variable.
8275 * @param enmGstReg The guest register this variable will be written to
8276 * after this call.
8277 * @param poff Pointer to the instruction buffer offset.
8278 * In case a register needs to be freed up or if the
8279 * variable content needs to be loaded off the stack.
8280 *
8281 * @note We DO NOT expect @a idxVar to be an argument variable,
8282 * because we can only in the commit stage of an instruction when this
8283 * function is used.
8284 */
8285DECL_HIDDEN_THROW(uint8_t)
8286iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8287{
8288 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8289 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8290 Assert(!pVar->fRegAcquired);
8291 AssertMsgStmt( pVar->cbVar <= 8
8292 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8293 || pVar->enmKind == kIemNativeVarKind_Stack),
8294 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8295 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8296 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8297
8298 /*
8299 * This shouldn't ever be used for arguments, unless it's in a weird else
8300 * branch that doesn't do any calling and even then it's questionable.
8301 *
8302 * However, in case someone writes crazy wrong MC code and does register
8303 * updates before making calls, just use the regular register allocator to
8304 * ensure we get a register suitable for the intended argument number.
8305 */
8306 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8307
8308 /*
8309 * If there is already a register for the variable, we transfer/set the
8310 * guest shadow copy assignment to it.
8311 */
8312 uint8_t idxReg = pVar->idxReg;
8313 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8314 {
8315 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8316 {
8317 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8318 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8319 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8320 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8321 }
8322 else
8323 {
8324 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8325 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8326 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8327 }
8328 /** @todo figure this one out. We need some way of making sure the register isn't
8329 * modified after this point, just in case we start writing crappy MC code. */
8330 pVar->enmGstReg = enmGstReg;
8331 pVar->fRegAcquired = true;
8332 return idxReg;
8333 }
8334 Assert(pVar->uArgNo == UINT8_MAX);
8335
8336 /*
8337 * Because this is supposed to be the commit stage, we're just tag along with the
8338 * temporary register allocator and upgrade it to a variable register.
8339 */
8340 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8341 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8342 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8343 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8344 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8345 pVar->idxReg = idxReg;
8346
8347 /*
8348 * Now we need to load the register value.
8349 */
8350 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8351 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8352 else
8353 {
8354 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8355 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8356 switch (pVar->cbVar)
8357 {
8358 case sizeof(uint64_t):
8359 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8360 break;
8361 case sizeof(uint32_t):
8362 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8363 break;
8364 case sizeof(uint16_t):
8365 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8366 break;
8367 case sizeof(uint8_t):
8368 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8369 break;
8370 default:
8371 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8372 }
8373 }
8374
8375 pVar->fRegAcquired = true;
8376 return idxReg;
8377}
8378
8379
8380/**
8381 * Sets the host register for @a idxVarRc to @a idxReg.
8382 *
8383 * The register must not be allocated. Any guest register shadowing will be
8384 * implictly dropped by this call.
8385 *
8386 * The variable must not have any register associated with it (causes
8387 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
8388 * implied.
8389 *
8390 * @returns idxReg
8391 * @param pReNative The recompiler state.
8392 * @param idxVar The variable.
8393 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
8394 * @param off For recording in debug info.
8395 *
8396 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
8397 */
8398DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
8399{
8400 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8401 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8402 Assert(!pVar->fRegAcquired);
8403 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8404 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
8405 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
8406
8407 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
8408 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8409
8410 iemNativeVarSetKindToStack(pReNative, idxVar);
8411 pVar->idxReg = idxReg;
8412
8413 return idxReg;
8414}
8415
8416
8417/**
8418 * A convenient helper function.
8419 */
8420DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8421 uint8_t idxReg, uint32_t *poff)
8422{
8423 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
8424 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
8425 return idxReg;
8426}
8427
8428
8429/**
8430 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8431 *
8432 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8433 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8434 * requirement of flushing anything in volatile host registers when making a
8435 * call.
8436 *
8437 * @returns New @a off value.
8438 * @param pReNative The recompiler state.
8439 * @param off The code buffer position.
8440 * @param fHstRegsNotToSave Set of registers not to save & restore.
8441 */
8442DECL_HIDDEN_THROW(uint32_t)
8443iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8444{
8445 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8446 if (fHstRegs)
8447 {
8448 do
8449 {
8450 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8451 fHstRegs &= ~RT_BIT_32(idxHstReg);
8452
8453 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8454 {
8455 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8456 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8457 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8458 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8459 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8460 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8461 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8462 {
8463 case kIemNativeVarKind_Stack:
8464 {
8465 /* Temporarily spill the variable register. */
8466 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8467 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8468 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8469 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8470 continue;
8471 }
8472
8473 case kIemNativeVarKind_Immediate:
8474 case kIemNativeVarKind_VarRef:
8475 case kIemNativeVarKind_GstRegRef:
8476 /* It is weird to have any of these loaded at this point. */
8477 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8478 continue;
8479
8480 case kIemNativeVarKind_End:
8481 case kIemNativeVarKind_Invalid:
8482 break;
8483 }
8484 AssertFailed();
8485 }
8486 else
8487 {
8488 /*
8489 * Allocate a temporary stack slot and spill the register to it.
8490 */
8491 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8492 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8493 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8494 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8495 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8496 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8497 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8498 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8499 }
8500 } while (fHstRegs);
8501 }
8502 return off;
8503}
8504
8505
8506/**
8507 * Emit code to restore volatile registers after to a call to a helper.
8508 *
8509 * @returns New @a off value.
8510 * @param pReNative The recompiler state.
8511 * @param off The code buffer position.
8512 * @param fHstRegsNotToSave Set of registers not to save & restore.
8513 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8514 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8515 */
8516DECL_HIDDEN_THROW(uint32_t)
8517iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8518{
8519 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8520 if (fHstRegs)
8521 {
8522 do
8523 {
8524 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8525 fHstRegs &= ~RT_BIT_32(idxHstReg);
8526
8527 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8528 {
8529 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8530 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8531 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8532 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8533 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8534 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8535 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8536 {
8537 case kIemNativeVarKind_Stack:
8538 {
8539 /* Unspill the variable register. */
8540 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8541 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8542 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8543 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8544 continue;
8545 }
8546
8547 case kIemNativeVarKind_Immediate:
8548 case kIemNativeVarKind_VarRef:
8549 case kIemNativeVarKind_GstRegRef:
8550 /* It is weird to have any of these loaded at this point. */
8551 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8552 continue;
8553
8554 case kIemNativeVarKind_End:
8555 case kIemNativeVarKind_Invalid:
8556 break;
8557 }
8558 AssertFailed();
8559 }
8560 else
8561 {
8562 /*
8563 * Restore from temporary stack slot.
8564 */
8565 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8566 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8567 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8568 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8569
8570 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8571 }
8572 } while (fHstRegs);
8573 }
8574 return off;
8575}
8576
8577
8578/**
8579 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8580 *
8581 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8582 *
8583 * ASSUMES that @a idxVar is valid and unpacked.
8584 */
8585DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8586{
8587 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8588 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8589 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8590 {
8591 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8592 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8593 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8594 Assert(cSlots > 0);
8595 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8596 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8597 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8598 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8599 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8600 }
8601 else
8602 Assert(idxStackSlot == UINT8_MAX);
8603}
8604
8605
8606/**
8607 * Worker that frees a single variable.
8608 *
8609 * ASSUMES that @a idxVar is valid and unpacked.
8610 */
8611DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8612{
8613 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8614 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8615 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8616
8617 /* Free the host register first if any assigned. */
8618 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8619 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8620 {
8621 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8622 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8623 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8624 }
8625
8626 /* Free argument mapping. */
8627 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8628 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8629 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8630
8631 /* Free the stack slots. */
8632 iemNativeVarFreeStackSlots(pReNative, idxVar);
8633
8634 /* Free the actual variable. */
8635 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8636 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8637}
8638
8639
8640/**
8641 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8642 */
8643DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8644{
8645 while (bmVars != 0)
8646 {
8647 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8648 bmVars &= ~RT_BIT_32(idxVar);
8649
8650#if 1 /** @todo optimize by simplifying this later... */
8651 iemNativeVarFreeOneWorker(pReNative, idxVar);
8652#else
8653 /* Only need to free the host register, the rest is done as bulk updates below. */
8654 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8655 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8656 {
8657 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8658 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8659 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8660 }
8661#endif
8662 }
8663#if 0 /** @todo optimize by simplifying this later... */
8664 pReNative->Core.bmVars = 0;
8665 pReNative->Core.bmStack = 0;
8666 pReNative->Core.u64ArgVars = UINT64_MAX;
8667#endif
8668}
8669
8670
8671/**
8672 * This is called by IEM_MC_END() to clean up all variables.
8673 */
8674DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
8675{
8676 uint32_t const bmVars = pReNative->Core.bmVars;
8677 if (bmVars != 0)
8678 iemNativeVarFreeAllSlow(pReNative, bmVars);
8679 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8680 Assert(pReNative->Core.bmStack == 0);
8681}
8682
8683
8684#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
8685
8686/**
8687 * This is called by IEM_MC_FREE_LOCAL.
8688 */
8689DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8690{
8691 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8692 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
8693 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8694}
8695
8696
8697#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
8698
8699/**
8700 * This is called by IEM_MC_FREE_ARG.
8701 */
8702DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8703{
8704 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8705 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
8706 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8707}
8708
8709
8710#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
8711
8712/**
8713 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
8714 */
8715DECL_INLINE_THROW(uint32_t)
8716iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
8717{
8718 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
8719 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
8720 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8721 Assert( pVarDst->cbVar == sizeof(uint16_t)
8722 || pVarDst->cbVar == sizeof(uint32_t));
8723
8724 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
8725 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
8726 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
8727 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
8728 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8729
8730 Assert(pVarDst->cbVar < pVarSrc->cbVar);
8731
8732 /*
8733 * Special case for immediates.
8734 */
8735 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
8736 {
8737 switch (pVarDst->cbVar)
8738 {
8739 case sizeof(uint16_t):
8740 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
8741 break;
8742 case sizeof(uint32_t):
8743 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
8744 break;
8745 default: AssertFailed(); break;
8746 }
8747 }
8748 else
8749 {
8750 /*
8751 * The generic solution for now.
8752 */
8753 /** @todo optimize this by having the python script make sure the source
8754 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
8755 * statement. Then we could just transfer the register assignments. */
8756 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
8757 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
8758 switch (pVarDst->cbVar)
8759 {
8760 case sizeof(uint16_t):
8761 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
8762 break;
8763 case sizeof(uint32_t):
8764 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8765 break;
8766 default: AssertFailed(); break;
8767 }
8768 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8769 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8770 }
8771 return off;
8772}
8773
8774
8775
8776/*********************************************************************************************************************************
8777* Emitters for IEM_MC_CALL_CIMPL_XXX *
8778*********************************************************************************************************************************/
8779
8780/**
8781 * Emits code to load a reference to the given guest register into @a idxGprDst.
8782 */
8783DECL_INLINE_THROW(uint32_t)
8784iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8785 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8786{
8787#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8788 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8789#endif
8790
8791 /*
8792 * Get the offset relative to the CPUMCTX structure.
8793 */
8794 uint32_t offCpumCtx;
8795 switch (enmClass)
8796 {
8797 case kIemNativeGstRegRef_Gpr:
8798 Assert(idxRegInClass < 16);
8799 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8800 break;
8801
8802 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8803 Assert(idxRegInClass < 4);
8804 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8805 break;
8806
8807 case kIemNativeGstRegRef_EFlags:
8808 Assert(idxRegInClass == 0);
8809 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8810 break;
8811
8812 case kIemNativeGstRegRef_MxCsr:
8813 Assert(idxRegInClass == 0);
8814 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8815 break;
8816
8817 case kIemNativeGstRegRef_FpuReg:
8818 Assert(idxRegInClass < 8);
8819 AssertFailed(); /** @todo what kind of indexing? */
8820 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8821 break;
8822
8823 case kIemNativeGstRegRef_MReg:
8824 Assert(idxRegInClass < 8);
8825 AssertFailed(); /** @todo what kind of indexing? */
8826 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8827 break;
8828
8829 case kIemNativeGstRegRef_XReg:
8830 Assert(idxRegInClass < 16);
8831 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8832 break;
8833
8834 default:
8835 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8836 }
8837
8838 /*
8839 * Load the value into the destination register.
8840 */
8841#ifdef RT_ARCH_AMD64
8842 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8843
8844#elif defined(RT_ARCH_ARM64)
8845 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8846 Assert(offCpumCtx < 4096);
8847 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8848
8849#else
8850# error "Port me!"
8851#endif
8852
8853 return off;
8854}
8855
8856
8857/**
8858 * Common code for CIMPL and AIMPL calls.
8859 *
8860 * These are calls that uses argument variables and such. They should not be
8861 * confused with internal calls required to implement an MC operation,
8862 * like a TLB load and similar.
8863 *
8864 * Upon return all that is left to do is to load any hidden arguments and
8865 * perform the call. All argument variables are freed.
8866 *
8867 * @returns New code buffer offset; throws VBox status code on error.
8868 * @param pReNative The native recompile state.
8869 * @param off The code buffer offset.
8870 * @param cArgs The total nubmer of arguments (includes hidden
8871 * count).
8872 * @param cHiddenArgs The number of hidden arguments. The hidden
8873 * arguments must not have any variable declared for
8874 * them, whereas all the regular arguments must
8875 * (tstIEMCheckMc ensures this).
8876 */
8877DECL_HIDDEN_THROW(uint32_t)
8878iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8879{
8880#ifdef VBOX_STRICT
8881 /*
8882 * Assert sanity.
8883 */
8884 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8885 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8886 for (unsigned i = 0; i < cHiddenArgs; i++)
8887 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8888 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8889 {
8890 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8891 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8892 }
8893 iemNativeRegAssertSanity(pReNative);
8894#endif
8895
8896 /* We don't know what the called function makes use of, so flush any pending register writes. */
8897 off = iemNativeRegFlushPendingWrites(pReNative, off);
8898
8899 /*
8900 * Before we do anything else, go over variables that are referenced and
8901 * make sure they are not in a register.
8902 */
8903 uint32_t bmVars = pReNative->Core.bmVars;
8904 if (bmVars)
8905 {
8906 do
8907 {
8908 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8909 bmVars &= ~RT_BIT_32(idxVar);
8910
8911 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8912 {
8913 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8914 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8915 {
8916 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8917 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8918 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8919 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8920 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8921
8922 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8923 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8924 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8925 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8926 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8927 }
8928 }
8929 } while (bmVars != 0);
8930#if 0 //def VBOX_STRICT
8931 iemNativeRegAssertSanity(pReNative);
8932#endif
8933 }
8934
8935 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8936
8937 /*
8938 * First, go over the host registers that will be used for arguments and make
8939 * sure they either hold the desired argument or are free.
8940 */
8941 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8942 {
8943 for (uint32_t i = 0; i < cRegArgs; i++)
8944 {
8945 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8946 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8947 {
8948 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8949 {
8950 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8951 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8952 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8953 Assert(pVar->idxReg == idxArgReg);
8954 uint8_t const uArgNo = pVar->uArgNo;
8955 if (uArgNo == i)
8956 { /* prefect */ }
8957 /* The variable allocator logic should make sure this is impossible,
8958 except for when the return register is used as a parameter (ARM,
8959 but not x86). */
8960#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8961 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8962 {
8963# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8964# error "Implement this"
8965# endif
8966 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8967 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8968 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8969 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8970 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8971 }
8972#endif
8973 else
8974 {
8975 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8976
8977 if (pVar->enmKind == kIemNativeVarKind_Stack)
8978 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8979 else
8980 {
8981 /* just free it, can be reloaded if used again */
8982 pVar->idxReg = UINT8_MAX;
8983 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8984 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8985 }
8986 }
8987 }
8988 else
8989 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8990 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8991 }
8992 }
8993#if 0 //def VBOX_STRICT
8994 iemNativeRegAssertSanity(pReNative);
8995#endif
8996 }
8997
8998 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8999
9000#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9001 /*
9002 * If there are any stack arguments, make sure they are in their place as well.
9003 *
9004 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
9005 * the caller) be loading it later and it must be free (see first loop).
9006 */
9007 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
9008 {
9009 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
9010 {
9011 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9012 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
9013 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9014 {
9015 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
9016 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
9017 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
9018 pVar->idxReg = UINT8_MAX;
9019 }
9020 else
9021 {
9022 /* Use ARG0 as temp for stuff we need registers for. */
9023 switch (pVar->enmKind)
9024 {
9025 case kIemNativeVarKind_Stack:
9026 {
9027 uint8_t const idxStackSlot = pVar->idxStackSlot;
9028 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9029 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
9030 iemNativeStackCalcBpDisp(idxStackSlot));
9031 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9032 continue;
9033 }
9034
9035 case kIemNativeVarKind_Immediate:
9036 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
9037 continue;
9038
9039 case kIemNativeVarKind_VarRef:
9040 {
9041 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9042 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9043 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9044 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9045 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9046 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9047 {
9048 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9049 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9050 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9051 }
9052 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9053 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9054 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
9055 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9056 continue;
9057 }
9058
9059 case kIemNativeVarKind_GstRegRef:
9060 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
9061 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9062 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9063 continue;
9064
9065 case kIemNativeVarKind_Invalid:
9066 case kIemNativeVarKind_End:
9067 break;
9068 }
9069 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9070 }
9071 }
9072# if 0 //def VBOX_STRICT
9073 iemNativeRegAssertSanity(pReNative);
9074# endif
9075 }
9076#else
9077 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
9078#endif
9079
9080 /*
9081 * Make sure the argument variables are loaded into their respective registers.
9082 *
9083 * We can optimize this by ASSUMING that any register allocations are for
9084 * registeres that have already been loaded and are ready. The previous step
9085 * saw to that.
9086 */
9087 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
9088 {
9089 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9090 {
9091 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9092 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9093 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
9094 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
9095 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
9096 else
9097 {
9098 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9099 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9100 {
9101 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9102 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
9103 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
9104 | RT_BIT_32(idxArgReg);
9105 pVar->idxReg = idxArgReg;
9106 }
9107 else
9108 {
9109 /* Use ARG0 as temp for stuff we need registers for. */
9110 switch (pVar->enmKind)
9111 {
9112 case kIemNativeVarKind_Stack:
9113 {
9114 uint8_t const idxStackSlot = pVar->idxStackSlot;
9115 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9116 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
9117 continue;
9118 }
9119
9120 case kIemNativeVarKind_Immediate:
9121 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
9122 continue;
9123
9124 case kIemNativeVarKind_VarRef:
9125 {
9126 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9127 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9128 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
9129 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9130 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9131 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9132 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9133 {
9134 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9135 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9136 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9137 }
9138 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9139 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9140 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
9141 continue;
9142 }
9143
9144 case kIemNativeVarKind_GstRegRef:
9145 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
9146 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9147 continue;
9148
9149 case kIemNativeVarKind_Invalid:
9150 case kIemNativeVarKind_End:
9151 break;
9152 }
9153 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9154 }
9155 }
9156 }
9157#if 0 //def VBOX_STRICT
9158 iemNativeRegAssertSanity(pReNative);
9159#endif
9160 }
9161#ifdef VBOX_STRICT
9162 else
9163 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9164 {
9165 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
9166 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
9167 }
9168#endif
9169
9170 /*
9171 * Free all argument variables (simplified).
9172 * Their lifetime always expires with the call they are for.
9173 */
9174 /** @todo Make the python script check that arguments aren't used after
9175 * IEM_MC_CALL_XXXX. */
9176 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
9177 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
9178 * an argument value. There is also some FPU stuff. */
9179 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
9180 {
9181 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
9182 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
9183
9184 /* no need to free registers: */
9185 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
9186 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9187 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9188 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9189 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9190 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9191
9192 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9193 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9194 iemNativeVarFreeStackSlots(pReNative, idxVar);
9195 }
9196 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9197
9198 /*
9199 * Flush volatile registers as we make the call.
9200 */
9201 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9202
9203 return off;
9204}
9205
9206
9207/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
9208DECL_HIDDEN_THROW(uint32_t)
9209iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
9210 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
9211
9212{
9213 /*
9214 * Do all the call setup and cleanup.
9215 */
9216 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
9217
9218 /*
9219 * Load the two or three hidden arguments.
9220 */
9221#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9222 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
9223 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9224 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
9225#else
9226 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9227 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
9228#endif
9229
9230 /*
9231 * Make the call and check the return code.
9232 *
9233 * Shadow PC copies are always flushed here, other stuff depends on flags.
9234 * Segment and general purpose registers are explictily flushed via the
9235 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
9236 * macros.
9237 */
9238 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
9239#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9240 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
9241#endif
9242 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
9243 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
9244 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
9245 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
9246
9247 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
9248}
9249
9250
9251#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
9252 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
9253
9254/** Emits code for IEM_MC_CALL_CIMPL_1. */
9255DECL_INLINE_THROW(uint32_t)
9256iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9257 uintptr_t pfnCImpl, uint8_t idxArg0)
9258{
9259 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9260 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
9261}
9262
9263
9264#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
9265 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
9266
9267/** Emits code for IEM_MC_CALL_CIMPL_2. */
9268DECL_INLINE_THROW(uint32_t)
9269iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9270 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
9271{
9272 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9273 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9274 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
9275}
9276
9277
9278#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
9279 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9280 (uintptr_t)a_pfnCImpl, a0, a1, a2)
9281
9282/** Emits code for IEM_MC_CALL_CIMPL_3. */
9283DECL_INLINE_THROW(uint32_t)
9284iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9285 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9286{
9287 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9288 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9289 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9290 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
9291}
9292
9293
9294#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
9295 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9296 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
9297
9298/** Emits code for IEM_MC_CALL_CIMPL_4. */
9299DECL_INLINE_THROW(uint32_t)
9300iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9301 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9302{
9303 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9304 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9305 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9306 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9307 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
9308}
9309
9310
9311#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
9312 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9313 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
9314
9315/** Emits code for IEM_MC_CALL_CIMPL_4. */
9316DECL_INLINE_THROW(uint32_t)
9317iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9318 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
9319{
9320 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9321 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9322 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9323 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9324 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
9325 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
9326}
9327
9328
9329/** Recompiler debugging: Flush guest register shadow copies. */
9330#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
9331
9332
9333
9334/*********************************************************************************************************************************
9335* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
9336*********************************************************************************************************************************/
9337
9338/**
9339 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
9340 */
9341DECL_INLINE_THROW(uint32_t)
9342iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9343 uintptr_t pfnAImpl, uint8_t cArgs)
9344{
9345 if (idxVarRc != UINT8_MAX)
9346 {
9347 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
9348 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
9349 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
9350 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
9351 }
9352
9353 /*
9354 * Do all the call setup and cleanup.
9355 */
9356 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
9357
9358 /*
9359 * Make the call and update the return code variable if we've got one.
9360 */
9361 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9362 if (idxVarRc != UINT8_MAX)
9363 {
9364off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
9365 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
9366 }
9367
9368 return off;
9369}
9370
9371
9372
9373#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
9374 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
9375
9376#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
9377 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
9378
9379/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
9380DECL_INLINE_THROW(uint32_t)
9381iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
9382{
9383 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
9384}
9385
9386
9387#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
9388 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
9389
9390#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
9391 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
9392
9393/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
9394DECL_INLINE_THROW(uint32_t)
9395iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
9396{
9397 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9398 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
9399}
9400
9401
9402#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
9403 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
9404
9405#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
9406 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
9407
9408/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
9409DECL_INLINE_THROW(uint32_t)
9410iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9411 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9412{
9413 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9414 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9415 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
9416}
9417
9418
9419#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
9420 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
9421
9422#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
9423 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
9424
9425/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
9426DECL_INLINE_THROW(uint32_t)
9427iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9428 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9429{
9430 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9431 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9432 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9433 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
9434}
9435
9436
9437#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
9438 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9439
9440#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
9441 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9442
9443/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
9444DECL_INLINE_THROW(uint32_t)
9445iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9446 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9447{
9448 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9449 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9450 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9451 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
9452 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
9453}
9454
9455
9456
9457/*********************************************************************************************************************************
9458* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
9459*********************************************************************************************************************************/
9460
9461#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
9462 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
9463
9464#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9465 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
9466
9467#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9468 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
9469
9470#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9471 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
9472
9473
9474/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
9475 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
9476DECL_INLINE_THROW(uint32_t)
9477iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
9478{
9479 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9480 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9481 Assert(iGRegEx < 20);
9482
9483 /* Same discussion as in iemNativeEmitFetchGregU16 */
9484 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9485 kIemNativeGstRegUse_ReadOnly);
9486
9487 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9488 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9489
9490 /* The value is zero-extended to the full 64-bit host register width. */
9491 if (iGRegEx < 16)
9492 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9493 else
9494 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9495
9496 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9497 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9498 return off;
9499}
9500
9501
9502#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9503 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
9504
9505#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9506 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
9507
9508#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9509 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
9510
9511/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
9512DECL_INLINE_THROW(uint32_t)
9513iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
9514{
9515 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9516 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9517 Assert(iGRegEx < 20);
9518
9519 /* Same discussion as in iemNativeEmitFetchGregU16 */
9520 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9521 kIemNativeGstRegUse_ReadOnly);
9522
9523 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9524 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9525
9526 if (iGRegEx < 16)
9527 {
9528 switch (cbSignExtended)
9529 {
9530 case sizeof(uint16_t):
9531 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9532 break;
9533 case sizeof(uint32_t):
9534 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9535 break;
9536 case sizeof(uint64_t):
9537 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9538 break;
9539 default: AssertFailed(); break;
9540 }
9541 }
9542 else
9543 {
9544 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9545 switch (cbSignExtended)
9546 {
9547 case sizeof(uint16_t):
9548 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9549 break;
9550 case sizeof(uint32_t):
9551 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9552 break;
9553 case sizeof(uint64_t):
9554 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9555 break;
9556 default: AssertFailed(); break;
9557 }
9558 }
9559
9560 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9561 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9562 return off;
9563}
9564
9565
9566
9567#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
9568 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
9569
9570#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
9571 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9572
9573#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
9574 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9575
9576/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
9577DECL_INLINE_THROW(uint32_t)
9578iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9579{
9580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9581 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9582 Assert(iGReg < 16);
9583
9584 /*
9585 * We can either just load the low 16-bit of the GPR into a host register
9586 * for the variable, or we can do so via a shadow copy host register. The
9587 * latter will avoid having to reload it if it's being stored later, but
9588 * will waste a host register if it isn't touched again. Since we don't
9589 * know what going to happen, we choose the latter for now.
9590 */
9591 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9592 kIemNativeGstRegUse_ReadOnly);
9593
9594 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9595 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9596 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9597 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9598
9599 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9600 return off;
9601}
9602
9603
9604#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
9605 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9606
9607#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
9608 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9609
9610/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
9611DECL_INLINE_THROW(uint32_t)
9612iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
9613{
9614 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9615 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9616 Assert(iGReg < 16);
9617
9618 /*
9619 * We can either just load the low 16-bit of the GPR into a host register
9620 * for the variable, or we can do so via a shadow copy host register. The
9621 * latter will avoid having to reload it if it's being stored later, but
9622 * will waste a host register if it isn't touched again. Since we don't
9623 * know what going to happen, we choose the latter for now.
9624 */
9625 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9626 kIemNativeGstRegUse_ReadOnly);
9627
9628 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9629 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9630 if (cbSignExtended == sizeof(uint32_t))
9631 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9632 else
9633 {
9634 Assert(cbSignExtended == sizeof(uint64_t));
9635 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9636 }
9637 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9638
9639 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9640 return off;
9641}
9642
9643
9644#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
9645 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
9646
9647#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
9648 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
9649
9650/** Emits code for IEM_MC_FETCH_GREG_U32. */
9651DECL_INLINE_THROW(uint32_t)
9652iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9653{
9654 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9655 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9656 Assert(iGReg < 16);
9657
9658 /*
9659 * We can either just load the low 16-bit of the GPR into a host register
9660 * for the variable, or we can do so via a shadow copy host register. The
9661 * latter will avoid having to reload it if it's being stored later, but
9662 * will waste a host register if it isn't touched again. Since we don't
9663 * know what going to happen, we choose the latter for now.
9664 */
9665 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9666 kIemNativeGstRegUse_ReadOnly);
9667
9668 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9669 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9670 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9671 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9672
9673 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9674 return off;
9675}
9676
9677
9678#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
9679 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
9680
9681/** Emits code for IEM_MC_FETCH_GREG_U32. */
9682DECL_INLINE_THROW(uint32_t)
9683iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9684{
9685 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9686 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9687 Assert(iGReg < 16);
9688
9689 /*
9690 * We can either just load the low 32-bit of the GPR into a host register
9691 * for the variable, or we can do so via a shadow copy host register. The
9692 * latter will avoid having to reload it if it's being stored later, but
9693 * will waste a host register if it isn't touched again. Since we don't
9694 * know what going to happen, we choose the latter for now.
9695 */
9696 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9697 kIemNativeGstRegUse_ReadOnly);
9698
9699 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9700 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9701 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9702 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9703
9704 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9705 return off;
9706}
9707
9708
9709#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
9710 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9711
9712#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
9713 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9714
9715/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
9716 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
9717DECL_INLINE_THROW(uint32_t)
9718iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9719{
9720 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9721 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9722 Assert(iGReg < 16);
9723
9724 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9725 kIemNativeGstRegUse_ReadOnly);
9726
9727 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9728 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9729 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
9730 /** @todo name the register a shadow one already? */
9731 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9732
9733 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9734 return off;
9735}
9736
9737
9738
9739/*********************************************************************************************************************************
9740* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
9741*********************************************************************************************************************************/
9742
9743#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
9744 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
9745
9746/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
9747DECL_INLINE_THROW(uint32_t)
9748iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
9749{
9750 Assert(iGRegEx < 20);
9751 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9752 kIemNativeGstRegUse_ForUpdate);
9753#ifdef RT_ARCH_AMD64
9754 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9755
9756 /* To the lowest byte of the register: mov r8, imm8 */
9757 if (iGRegEx < 16)
9758 {
9759 if (idxGstTmpReg >= 8)
9760 pbCodeBuf[off++] = X86_OP_REX_B;
9761 else if (idxGstTmpReg >= 4)
9762 pbCodeBuf[off++] = X86_OP_REX;
9763 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9764 pbCodeBuf[off++] = u8Value;
9765 }
9766 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
9767 else if (idxGstTmpReg < 4)
9768 {
9769 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
9770 pbCodeBuf[off++] = u8Value;
9771 }
9772 else
9773 {
9774 /* ror reg64, 8 */
9775 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9776 pbCodeBuf[off++] = 0xc1;
9777 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9778 pbCodeBuf[off++] = 8;
9779
9780 /* mov reg8, imm8 */
9781 if (idxGstTmpReg >= 8)
9782 pbCodeBuf[off++] = X86_OP_REX_B;
9783 else if (idxGstTmpReg >= 4)
9784 pbCodeBuf[off++] = X86_OP_REX;
9785 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9786 pbCodeBuf[off++] = u8Value;
9787
9788 /* rol reg64, 8 */
9789 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9790 pbCodeBuf[off++] = 0xc1;
9791 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9792 pbCodeBuf[off++] = 8;
9793 }
9794
9795#elif defined(RT_ARCH_ARM64)
9796 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9797 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9798 if (iGRegEx < 16)
9799 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9800 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9801 else
9802 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9803 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9804 iemNativeRegFreeTmp(pReNative, idxImmReg);
9805
9806#else
9807# error "Port me!"
9808#endif
9809
9810 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9811
9812 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9813
9814 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9815 return off;
9816}
9817
9818
9819#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9820 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9821
9822/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9823DECL_INLINE_THROW(uint32_t)
9824iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9825{
9826 Assert(iGRegEx < 20);
9827 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9828
9829 /*
9830 * If it's a constant value (unlikely) we treat this as a
9831 * IEM_MC_STORE_GREG_U8_CONST statement.
9832 */
9833 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
9834 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
9835 { /* likely */ }
9836 else
9837 {
9838 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
9839 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9840 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
9841 }
9842
9843 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9844 kIemNativeGstRegUse_ForUpdate);
9845 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9846
9847#ifdef RT_ARCH_AMD64
9848 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
9849 if (iGRegEx < 16)
9850 {
9851 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
9852 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9853 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9854 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9855 pbCodeBuf[off++] = X86_OP_REX;
9856 pbCodeBuf[off++] = 0x8a;
9857 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9858 }
9859 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
9860 else if (idxGstTmpReg < 4 && idxVarReg < 4)
9861 {
9862 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
9863 pbCodeBuf[off++] = 0x8a;
9864 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
9865 }
9866 else
9867 {
9868 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
9869
9870 /* ror reg64, 8 */
9871 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9872 pbCodeBuf[off++] = 0xc1;
9873 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9874 pbCodeBuf[off++] = 8;
9875
9876 /* mov reg8, reg8(r/m) */
9877 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9878 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9879 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9880 pbCodeBuf[off++] = X86_OP_REX;
9881 pbCodeBuf[off++] = 0x8a;
9882 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9883
9884 /* rol reg64, 8 */
9885 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9886 pbCodeBuf[off++] = 0xc1;
9887 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9888 pbCodeBuf[off++] = 8;
9889 }
9890
9891#elif defined(RT_ARCH_ARM64)
9892 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
9893 or
9894 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
9895 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9896 if (iGRegEx < 16)
9897 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
9898 else
9899 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
9900
9901#else
9902# error "Port me!"
9903#endif
9904 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9905
9906 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9907
9908 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9909 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9910 return off;
9911}
9912
9913
9914
9915#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
9916 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
9917
9918/** Emits code for IEM_MC_STORE_GREG_U16. */
9919DECL_INLINE_THROW(uint32_t)
9920iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
9921{
9922 Assert(iGReg < 16);
9923 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9924 kIemNativeGstRegUse_ForUpdate);
9925#ifdef RT_ARCH_AMD64
9926 /* mov reg16, imm16 */
9927 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9928 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9929 if (idxGstTmpReg >= 8)
9930 pbCodeBuf[off++] = X86_OP_REX_B;
9931 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
9932 pbCodeBuf[off++] = RT_BYTE1(uValue);
9933 pbCodeBuf[off++] = RT_BYTE2(uValue);
9934
9935#elif defined(RT_ARCH_ARM64)
9936 /* movk xdst, #uValue, lsl #0 */
9937 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9938 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
9939
9940#else
9941# error "Port me!"
9942#endif
9943
9944 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9945
9946 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9947 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9948 return off;
9949}
9950
9951
9952#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
9953 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
9954
9955/** Emits code for IEM_MC_STORE_GREG_U16. */
9956DECL_INLINE_THROW(uint32_t)
9957iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9958{
9959 Assert(iGReg < 16);
9960 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9961
9962 /*
9963 * If it's a constant value (unlikely) we treat this as a
9964 * IEM_MC_STORE_GREG_U16_CONST statement.
9965 */
9966 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
9967 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
9968 { /* likely */ }
9969 else
9970 {
9971 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
9972 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9973 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
9974 }
9975
9976 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9977 kIemNativeGstRegUse_ForUpdate);
9978
9979#ifdef RT_ARCH_AMD64
9980 /* mov reg16, reg16 or [mem16] */
9981 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9982 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9983 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9984 {
9985 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
9986 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
9987 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
9988 pbCodeBuf[off++] = 0x8b;
9989 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
9990 }
9991 else
9992 {
9993 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
9994 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9995 if (idxGstTmpReg >= 8)
9996 pbCodeBuf[off++] = X86_OP_REX_R;
9997 pbCodeBuf[off++] = 0x8b;
9998 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9999 }
10000
10001#elif defined(RT_ARCH_ARM64)
10002 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
10003 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
10004 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10005 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
10006 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10007
10008#else
10009# error "Port me!"
10010#endif
10011
10012 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10013
10014 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10015 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10016 return off;
10017}
10018
10019
10020#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
10021 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
10022
10023/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
10024DECL_INLINE_THROW(uint32_t)
10025iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
10026{
10027 Assert(iGReg < 16);
10028 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10029 kIemNativeGstRegUse_ForFullWrite);
10030 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
10031 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10032 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10033 return off;
10034}
10035
10036
10037#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
10038 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
10039
10040/** Emits code for IEM_MC_STORE_GREG_U32. */
10041DECL_INLINE_THROW(uint32_t)
10042iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10043{
10044 Assert(iGReg < 16);
10045 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10046
10047 /*
10048 * If it's a constant value (unlikely) we treat this as a
10049 * IEM_MC_STORE_GREG_U32_CONST statement.
10050 */
10051 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10052 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10053 { /* likely */ }
10054 else
10055 {
10056 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10057 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10058 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
10059 }
10060
10061 /*
10062 * For the rest we allocate a guest register for the variable and writes
10063 * it to the CPUMCTX structure.
10064 */
10065 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
10066 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10067#ifdef VBOX_STRICT
10068 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
10069#endif
10070 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10071 return off;
10072}
10073
10074
10075#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
10076 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
10077
10078/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
10079DECL_INLINE_THROW(uint32_t)
10080iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
10081{
10082 Assert(iGReg < 16);
10083 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10084 kIemNativeGstRegUse_ForFullWrite);
10085 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
10086 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10087 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10088 return off;
10089}
10090
10091
10092#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
10093 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
10094
10095/** Emits code for IEM_MC_STORE_GREG_U64. */
10096DECL_INLINE_THROW(uint32_t)
10097iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10098{
10099 Assert(iGReg < 16);
10100 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10101
10102 /*
10103 * If it's a constant value (unlikely) we treat this as a
10104 * IEM_MC_STORE_GREG_U64_CONST statement.
10105 */
10106 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10107 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10108 { /* likely */ }
10109 else
10110 {
10111 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10112 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10113 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
10114 }
10115
10116 /*
10117 * For the rest we allocate a guest register for the variable and writes
10118 * it to the CPUMCTX structure.
10119 */
10120 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
10121 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10122 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10123 return off;
10124}
10125
10126
10127#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
10128 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
10129
10130/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
10131DECL_INLINE_THROW(uint32_t)
10132iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
10133{
10134 Assert(iGReg < 16);
10135 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10136 kIemNativeGstRegUse_ForUpdate);
10137 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
10138 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10139 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10140 return off;
10141}
10142
10143
10144/*********************************************************************************************************************************
10145* General purpose register manipulation (add, sub). *
10146*********************************************************************************************************************************/
10147
10148#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
10149 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
10150
10151/** Emits code for IEM_MC_ADD_GREG_U16. */
10152DECL_INLINE_THROW(uint32_t)
10153iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
10154{
10155 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10156 kIemNativeGstRegUse_ForUpdate);
10157
10158#ifdef RT_ARCH_AMD64
10159 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
10160 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10161 if (idxGstTmpReg >= 8)
10162 pbCodeBuf[off++] = X86_OP_REX_B;
10163 if (uAddend == 1)
10164 {
10165 pbCodeBuf[off++] = 0xff; /* inc */
10166 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10167 }
10168 else
10169 {
10170 pbCodeBuf[off++] = 0x81;
10171 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10172 pbCodeBuf[off++] = uAddend;
10173 pbCodeBuf[off++] = 0;
10174 }
10175
10176#else
10177 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
10178 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10179
10180 /* sub tmp, gstgrp, uAddend */
10181 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
10182
10183 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
10184 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
10185
10186 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10187#endif
10188
10189 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10190
10191 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10192
10193 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10194 return off;
10195}
10196
10197
10198#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
10199 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10200
10201#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
10202 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10203
10204/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
10205DECL_INLINE_THROW(uint32_t)
10206iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
10207{
10208 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10209 kIemNativeGstRegUse_ForUpdate);
10210
10211#ifdef RT_ARCH_AMD64
10212 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10213 if (f64Bit)
10214 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10215 else if (idxGstTmpReg >= 8)
10216 pbCodeBuf[off++] = X86_OP_REX_B;
10217 if (uAddend == 1)
10218 {
10219 pbCodeBuf[off++] = 0xff; /* inc */
10220 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10221 }
10222 else if (uAddend < 128)
10223 {
10224 pbCodeBuf[off++] = 0x83; /* add */
10225 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10226 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10227 }
10228 else
10229 {
10230 pbCodeBuf[off++] = 0x81; /* add */
10231 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10232 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10233 pbCodeBuf[off++] = 0;
10234 pbCodeBuf[off++] = 0;
10235 pbCodeBuf[off++] = 0;
10236 }
10237
10238#else
10239 /* sub tmp, gstgrp, uAddend */
10240 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
10242
10243#endif
10244
10245 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10246
10247 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10248
10249 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10250 return off;
10251}
10252
10253
10254
10255#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
10256 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
10257
10258/** Emits code for IEM_MC_SUB_GREG_U16. */
10259DECL_INLINE_THROW(uint32_t)
10260iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
10261{
10262 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10263 kIemNativeGstRegUse_ForUpdate);
10264
10265#ifdef RT_ARCH_AMD64
10266 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
10267 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10268 if (idxGstTmpReg >= 8)
10269 pbCodeBuf[off++] = X86_OP_REX_B;
10270 if (uSubtrahend == 1)
10271 {
10272 pbCodeBuf[off++] = 0xff; /* dec */
10273 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10274 }
10275 else
10276 {
10277 pbCodeBuf[off++] = 0x81;
10278 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10279 pbCodeBuf[off++] = uSubtrahend;
10280 pbCodeBuf[off++] = 0;
10281 }
10282
10283#else
10284 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
10285 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10286
10287 /* sub tmp, gstgrp, uSubtrahend */
10288 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
10289
10290 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
10291 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
10292
10293 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10294#endif
10295
10296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10297
10298 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10299
10300 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10301 return off;
10302}
10303
10304
10305#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
10306 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10307
10308#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
10309 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10310
10311/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
10312DECL_INLINE_THROW(uint32_t)
10313iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
10314{
10315 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10316 kIemNativeGstRegUse_ForUpdate);
10317
10318#ifdef RT_ARCH_AMD64
10319 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10320 if (f64Bit)
10321 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10322 else if (idxGstTmpReg >= 8)
10323 pbCodeBuf[off++] = X86_OP_REX_B;
10324 if (uSubtrahend == 1)
10325 {
10326 pbCodeBuf[off++] = 0xff; /* dec */
10327 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10328 }
10329 else if (uSubtrahend < 128)
10330 {
10331 pbCodeBuf[off++] = 0x83; /* sub */
10332 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10333 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10334 }
10335 else
10336 {
10337 pbCodeBuf[off++] = 0x81; /* sub */
10338 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10339 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10340 pbCodeBuf[off++] = 0;
10341 pbCodeBuf[off++] = 0;
10342 pbCodeBuf[off++] = 0;
10343 }
10344
10345#else
10346 /* sub tmp, gstgrp, uSubtrahend */
10347 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10348 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
10349
10350#endif
10351
10352 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10353
10354 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10355
10356 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10357 return off;
10358}
10359
10360
10361/*********************************************************************************************************************************
10362* Local variable manipulation (add, sub, and, or). *
10363*********************************************************************************************************************************/
10364
10365#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
10366 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10367
10368#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
10369 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10370
10371#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
10372 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10373
10374#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
10375 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10376
10377/** Emits code for AND'ing a local and a constant value. */
10378DECL_INLINE_THROW(uint32_t)
10379iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10380{
10381#ifdef VBOX_STRICT
10382 switch (cbMask)
10383 {
10384 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10385 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10386 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10387 case sizeof(uint64_t): break;
10388 default: AssertFailedBreak();
10389 }
10390#endif
10391
10392 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10393 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10394
10395 if (cbMask <= sizeof(uint32_t))
10396 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
10397 else
10398 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
10399
10400 iemNativeVarRegisterRelease(pReNative, idxVar);
10401 return off;
10402}
10403
10404
10405#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
10406 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10407
10408#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
10409 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10410
10411#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
10412 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10413
10414#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
10415 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10416
10417/** Emits code for OR'ing a local and a constant value. */
10418DECL_INLINE_THROW(uint32_t)
10419iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10420{
10421#ifdef VBOX_STRICT
10422 switch (cbMask)
10423 {
10424 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10425 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10426 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10427 case sizeof(uint64_t): break;
10428 default: AssertFailedBreak();
10429 }
10430#endif
10431
10432 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10433 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10434
10435 if (cbMask <= sizeof(uint32_t))
10436 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
10437 else
10438 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
10439
10440 iemNativeVarRegisterRelease(pReNative, idxVar);
10441 return off;
10442}
10443
10444
10445#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
10446 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
10447
10448#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
10449 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
10450
10451#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
10452 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
10453
10454/** Emits code for reversing the byte order in a local value. */
10455DECL_INLINE_THROW(uint32_t)
10456iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
10457{
10458 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10459 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
10460
10461 switch (cbLocal)
10462 {
10463 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
10464 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
10465 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
10466 default: AssertFailedBreak();
10467 }
10468
10469 iemNativeVarRegisterRelease(pReNative, idxVar);
10470 return off;
10471}
10472
10473
10474
10475/*********************************************************************************************************************************
10476* EFLAGS *
10477*********************************************************************************************************************************/
10478
10479#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10480# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
10481#else
10482# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
10483 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
10484
10485DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
10486{
10487 if (fEflOutput)
10488 {
10489 PVMCPUCC const pVCpu = pReNative->pVCpu;
10490# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10491 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
10492 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
10493 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
10494# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10495 if (fEflOutput & (a_fEfl)) \
10496 { \
10497 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
10498 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10499 else \
10500 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10501 } else do { } while (0)
10502# else
10503 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
10504 IEMLIVENESSBIT const LivenessClobbered =
10505 {
10506 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10507 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10508 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10509 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10510 };
10511 IEMLIVENESSBIT const LivenessDelayable =
10512 {
10513 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10514 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10515 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10516 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10517 };
10518# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10519 if (fEflOutput & (a_fEfl)) \
10520 { \
10521 if (LivenessClobbered.a_fLivenessMember) \
10522 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10523 else if (LivenessDelayable.a_fLivenessMember) \
10524 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
10525 else \
10526 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10527 } else do { } while (0)
10528# endif
10529 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
10530 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
10531 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
10532 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
10533 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
10534 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
10535 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
10536# undef CHECK_FLAG_AND_UPDATE_STATS
10537 }
10538 RT_NOREF(fEflInput);
10539}
10540#endif /* VBOX_WITH_STATISTICS */
10541
10542#undef IEM_MC_FETCH_EFLAGS /* should not be used */
10543#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10544 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
10545
10546/** Handles IEM_MC_FETCH_EFLAGS_EX. */
10547DECL_INLINE_THROW(uint32_t)
10548iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
10549 uint32_t fEflInput, uint32_t fEflOutput)
10550{
10551 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
10552 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10553 RT_NOREF(fEflInput, fEflOutput);
10554
10555#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10556# ifdef VBOX_STRICT
10557 if ( pReNative->idxCurCall != 0
10558 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
10559 {
10560 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
10561 uint32_t const fBoth = fEflInput | fEflOutput;
10562# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
10563 AssertMsg( !(fBoth & (a_fElfConst)) \
10564 || (!(fEflInput & (a_fElfConst)) \
10565 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10566 : !(fEflOutput & (a_fElfConst)) \
10567 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10568 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
10569 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
10570 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
10571 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
10572 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
10573 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
10574 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
10575 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
10576 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
10577# undef ASSERT_ONE_EFL
10578 }
10579# endif
10580#endif
10581
10582 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
10583 * the existing shadow copy. */
10584 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
10585 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10586 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
10587 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10588 return off;
10589}
10590
10591
10592
10593/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
10594 * start using it with custom native code emission (inlining assembly
10595 * instruction helpers). */
10596#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
10597#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10598 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10599 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
10600
10601/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
10602DECL_INLINE_THROW(uint32_t)
10603iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
10604{
10605 RT_NOREF(fEflOutput);
10606 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
10607 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10608
10609#ifdef VBOX_STRICT
10610 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
10611 uint32_t offFixup = off;
10612 off = iemNativeEmitJnzToFixed(pReNative, off, off);
10613 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
10614 iemNativeFixupFixedJump(pReNative, offFixup, off);
10615
10616 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
10617 offFixup = off;
10618 off = iemNativeEmitJzToFixed(pReNative, off, off);
10619 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
10620 iemNativeFixupFixedJump(pReNative, offFixup, off);
10621
10622 /** @todo validate that only bits in the fElfOutput mask changed. */
10623#endif
10624
10625 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10626 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
10627 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10628 return off;
10629}
10630
10631
10632
10633/*********************************************************************************************************************************
10634* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
10635*********************************************************************************************************************************/
10636
10637#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
10638 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
10639
10640#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
10641 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
10642
10643#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
10644 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
10645
10646
10647/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
10648 * IEM_MC_FETCH_SREG_ZX_U64. */
10649DECL_INLINE_THROW(uint32_t)
10650iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
10651{
10652 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10653 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
10654 Assert(iSReg < X86_SREG_COUNT);
10655
10656 /*
10657 * For now, we will not create a shadow copy of a selector. The rational
10658 * is that since we do not recompile the popping and loading of segment
10659 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
10660 * pushing and moving to registers, there is only a small chance that the
10661 * shadow copy will be accessed again before the register is reloaded. One
10662 * scenario would be nested called in 16-bit code, but I doubt it's worth
10663 * the extra register pressure atm.
10664 *
10665 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
10666 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
10667 * store scencario covered at present (r160730).
10668 */
10669 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10670 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10671 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
10672 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10673 return off;
10674}
10675
10676
10677
10678/*********************************************************************************************************************************
10679* Register references. *
10680*********************************************************************************************************************************/
10681
10682#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
10683 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
10684
10685#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
10686 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
10687
10688/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
10689DECL_INLINE_THROW(uint32_t)
10690iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
10691{
10692 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10693 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10694 Assert(iGRegEx < 20);
10695
10696 if (iGRegEx < 16)
10697 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10698 else
10699 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
10700
10701 /* If we've delayed writing back the register value, flush it now. */
10702 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10703
10704 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10705 if (!fConst)
10706 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
10707
10708 return off;
10709}
10710
10711#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
10712 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
10713
10714#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
10715 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
10716
10717#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
10718 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
10719
10720#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
10721 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
10722
10723#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
10724 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
10725
10726#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
10727 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
10728
10729#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
10730 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
10731
10732#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
10733 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
10734
10735#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
10736 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
10737
10738#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
10739 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
10740
10741/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
10742DECL_INLINE_THROW(uint32_t)
10743iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
10744{
10745 Assert(iGReg < 16);
10746 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
10747 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10748
10749 /* If we've delayed writing back the register value, flush it now. */
10750 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
10751
10752 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10753 if (!fConst)
10754 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
10755
10756 return off;
10757}
10758
10759
10760#undef IEM_MC_REF_EFLAGS /* should not be used. */
10761#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
10762 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10763 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
10764
10765/** Handles IEM_MC_REF_EFLAGS. */
10766DECL_INLINE_THROW(uint32_t)
10767iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10768{
10769 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
10770 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10771
10772 /* If we've delayed writing back the register value, flush it now. */
10773 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
10774
10775 /* If there is a shadow copy of guest EFLAGS, flush it now. */
10776 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
10777
10778 return off;
10779}
10780
10781
10782/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
10783 * different code from threaded recompiler, maybe it would be helpful. For now
10784 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
10785#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
10786
10787
10788
10789/*********************************************************************************************************************************
10790* Effective Address Calculation *
10791*********************************************************************************************************************************/
10792#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
10793 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
10794
10795/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
10796 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
10797DECL_INLINE_THROW(uint32_t)
10798iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10799 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
10800{
10801 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10802
10803 /*
10804 * Handle the disp16 form with no registers first.
10805 *
10806 * Convert to an immediate value, as that'll delay the register allocation
10807 * and assignment till the memory access / call / whatever and we can use
10808 * a more appropriate register (or none at all).
10809 */
10810 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
10811 {
10812 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
10813 return off;
10814 }
10815
10816 /* Determin the displacment. */
10817 uint16_t u16EffAddr;
10818 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10819 {
10820 case 0: u16EffAddr = 0; break;
10821 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
10822 case 2: u16EffAddr = u16Disp; break;
10823 default: AssertFailedStmt(u16EffAddr = 0);
10824 }
10825
10826 /* Determine the registers involved. */
10827 uint8_t idxGstRegBase;
10828 uint8_t idxGstRegIndex;
10829 switch (bRm & X86_MODRM_RM_MASK)
10830 {
10831 case 0:
10832 idxGstRegBase = X86_GREG_xBX;
10833 idxGstRegIndex = X86_GREG_xSI;
10834 break;
10835 case 1:
10836 idxGstRegBase = X86_GREG_xBX;
10837 idxGstRegIndex = X86_GREG_xDI;
10838 break;
10839 case 2:
10840 idxGstRegBase = X86_GREG_xBP;
10841 idxGstRegIndex = X86_GREG_xSI;
10842 break;
10843 case 3:
10844 idxGstRegBase = X86_GREG_xBP;
10845 idxGstRegIndex = X86_GREG_xDI;
10846 break;
10847 case 4:
10848 idxGstRegBase = X86_GREG_xSI;
10849 idxGstRegIndex = UINT8_MAX;
10850 break;
10851 case 5:
10852 idxGstRegBase = X86_GREG_xDI;
10853 idxGstRegIndex = UINT8_MAX;
10854 break;
10855 case 6:
10856 idxGstRegBase = X86_GREG_xBP;
10857 idxGstRegIndex = UINT8_MAX;
10858 break;
10859#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
10860 default:
10861#endif
10862 case 7:
10863 idxGstRegBase = X86_GREG_xBX;
10864 idxGstRegIndex = UINT8_MAX;
10865 break;
10866 }
10867
10868 /*
10869 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
10870 */
10871 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10872 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10873 kIemNativeGstRegUse_ReadOnly);
10874 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
10875 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10876 kIemNativeGstRegUse_ReadOnly)
10877 : UINT8_MAX;
10878#ifdef RT_ARCH_AMD64
10879 if (idxRegIndex == UINT8_MAX)
10880 {
10881 if (u16EffAddr == 0)
10882 {
10883 /* movxz ret, base */
10884 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
10885 }
10886 else
10887 {
10888 /* lea ret32, [base64 + disp32] */
10889 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10890 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10891 if (idxRegRet >= 8 || idxRegBase >= 8)
10892 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10893 pbCodeBuf[off++] = 0x8d;
10894 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10895 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
10896 else
10897 {
10898 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
10899 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10900 }
10901 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10902 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10903 pbCodeBuf[off++] = 0;
10904 pbCodeBuf[off++] = 0;
10905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10906
10907 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10908 }
10909 }
10910 else
10911 {
10912 /* lea ret32, [index64 + base64 (+ disp32)] */
10913 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10914 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10915 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10916 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10917 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10918 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10919 pbCodeBuf[off++] = 0x8d;
10920 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
10921 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10922 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
10923 if (bMod == X86_MOD_MEM4)
10924 {
10925 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10926 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10927 pbCodeBuf[off++] = 0;
10928 pbCodeBuf[off++] = 0;
10929 }
10930 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10931 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10932 }
10933
10934#elif defined(RT_ARCH_ARM64)
10935 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
10936 if (u16EffAddr == 0)
10937 {
10938 if (idxRegIndex == UINT8_MAX)
10939 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
10940 else
10941 {
10942 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
10943 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10944 }
10945 }
10946 else
10947 {
10948 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
10949 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
10950 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
10951 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10952 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
10953 else
10954 {
10955 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
10956 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10957 }
10958 if (idxRegIndex != UINT8_MAX)
10959 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
10960 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10961 }
10962
10963#else
10964# error "port me"
10965#endif
10966
10967 if (idxRegIndex != UINT8_MAX)
10968 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10969 iemNativeRegFreeTmp(pReNative, idxRegBase);
10970 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10971 return off;
10972}
10973
10974
10975#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
10976 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
10977
10978/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
10979 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
10980DECL_INLINE_THROW(uint32_t)
10981iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10982 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
10983{
10984 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10985
10986 /*
10987 * Handle the disp32 form with no registers first.
10988 *
10989 * Convert to an immediate value, as that'll delay the register allocation
10990 * and assignment till the memory access / call / whatever and we can use
10991 * a more appropriate register (or none at all).
10992 */
10993 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10994 {
10995 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
10996 return off;
10997 }
10998
10999 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
11000 uint32_t u32EffAddr = 0;
11001 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11002 {
11003 case 0: break;
11004 case 1: u32EffAddr = (int8_t)u32Disp; break;
11005 case 2: u32EffAddr = u32Disp; break;
11006 default: AssertFailed();
11007 }
11008
11009 /* Get the register (or SIB) value. */
11010 uint8_t idxGstRegBase = UINT8_MAX;
11011 uint8_t idxGstRegIndex = UINT8_MAX;
11012 uint8_t cShiftIndex = 0;
11013 switch (bRm & X86_MODRM_RM_MASK)
11014 {
11015 case 0: idxGstRegBase = X86_GREG_xAX; break;
11016 case 1: idxGstRegBase = X86_GREG_xCX; break;
11017 case 2: idxGstRegBase = X86_GREG_xDX; break;
11018 case 3: idxGstRegBase = X86_GREG_xBX; break;
11019 case 4: /* SIB */
11020 {
11021 /* index /w scaling . */
11022 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
11023 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
11024 {
11025 case 0: idxGstRegIndex = X86_GREG_xAX; break;
11026 case 1: idxGstRegIndex = X86_GREG_xCX; break;
11027 case 2: idxGstRegIndex = X86_GREG_xDX; break;
11028 case 3: idxGstRegIndex = X86_GREG_xBX; break;
11029 case 4: cShiftIndex = 0; /*no index*/ break;
11030 case 5: idxGstRegIndex = X86_GREG_xBP; break;
11031 case 6: idxGstRegIndex = X86_GREG_xSI; break;
11032 case 7: idxGstRegIndex = X86_GREG_xDI; break;
11033 }
11034
11035 /* base */
11036 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
11037 {
11038 case 0: idxGstRegBase = X86_GREG_xAX; break;
11039 case 1: idxGstRegBase = X86_GREG_xCX; break;
11040 case 2: idxGstRegBase = X86_GREG_xDX; break;
11041 case 3: idxGstRegBase = X86_GREG_xBX; break;
11042 case 4:
11043 idxGstRegBase = X86_GREG_xSP;
11044 u32EffAddr += uSibAndRspOffset >> 8;
11045 break;
11046 case 5:
11047 if ((bRm & X86_MODRM_MOD_MASK) != 0)
11048 idxGstRegBase = X86_GREG_xBP;
11049 else
11050 {
11051 Assert(u32EffAddr == 0);
11052 u32EffAddr = u32Disp;
11053 }
11054 break;
11055 case 6: idxGstRegBase = X86_GREG_xSI; break;
11056 case 7: idxGstRegBase = X86_GREG_xDI; break;
11057 }
11058 break;
11059 }
11060 case 5: idxGstRegBase = X86_GREG_xBP; break;
11061 case 6: idxGstRegBase = X86_GREG_xSI; break;
11062 case 7: idxGstRegBase = X86_GREG_xDI; break;
11063 }
11064
11065 /*
11066 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11067 * the start of the function.
11068 */
11069 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11070 {
11071 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
11072 return off;
11073 }
11074
11075 /*
11076 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11077 */
11078 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11079 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11080 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11081 kIemNativeGstRegUse_ReadOnly);
11082 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11083 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11084 kIemNativeGstRegUse_ReadOnly);
11085
11086 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11087 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11088 {
11089 idxRegBase = idxRegIndex;
11090 idxRegIndex = UINT8_MAX;
11091 }
11092
11093#ifdef RT_ARCH_AMD64
11094 if (idxRegIndex == UINT8_MAX)
11095 {
11096 if (u32EffAddr == 0)
11097 {
11098 /* mov ret, base */
11099 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11100 }
11101 else
11102 {
11103 /* lea ret32, [base64 + disp32] */
11104 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11105 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11106 if (idxRegRet >= 8 || idxRegBase >= 8)
11107 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
11108 pbCodeBuf[off++] = 0x8d;
11109 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11110 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11111 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11112 else
11113 {
11114 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11115 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11116 }
11117 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11118 if (bMod == X86_MOD_MEM4)
11119 {
11120 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11121 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11122 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11123 }
11124 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11125 }
11126 }
11127 else
11128 {
11129 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11130 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11131 if (idxRegBase == UINT8_MAX)
11132 {
11133 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
11134 if (idxRegRet >= 8 || idxRegIndex >= 8)
11135 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11136 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11137 pbCodeBuf[off++] = 0x8d;
11138 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11139 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11140 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11141 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11142 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11143 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11144 }
11145 else
11146 {
11147 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11148 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11149 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11150 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11151 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11152 pbCodeBuf[off++] = 0x8d;
11153 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11154 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11155 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11156 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11157 if (bMod != X86_MOD_MEM0)
11158 {
11159 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11160 if (bMod == X86_MOD_MEM4)
11161 {
11162 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11163 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11164 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11165 }
11166 }
11167 }
11168 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11169 }
11170
11171#elif defined(RT_ARCH_ARM64)
11172 if (u32EffAddr == 0)
11173 {
11174 if (idxRegIndex == UINT8_MAX)
11175 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11176 else if (idxRegBase == UINT8_MAX)
11177 {
11178 if (cShiftIndex == 0)
11179 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
11180 else
11181 {
11182 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11183 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
11184 }
11185 }
11186 else
11187 {
11188 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11189 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11190 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11191 }
11192 }
11193 else
11194 {
11195 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
11196 {
11197 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11198 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
11199 }
11200 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
11201 {
11202 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11203 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
11204 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
11205 }
11206 else
11207 {
11208 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
11209 if (idxRegBase != UINT8_MAX)
11210 {
11211 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11212 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
11213 }
11214 }
11215 if (idxRegIndex != UINT8_MAX)
11216 {
11217 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11218 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11219 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11220 }
11221 }
11222
11223#else
11224# error "port me"
11225#endif
11226
11227 if (idxRegIndex != UINT8_MAX)
11228 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11229 if (idxRegBase != UINT8_MAX)
11230 iemNativeRegFreeTmp(pReNative, idxRegBase);
11231 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11232 return off;
11233}
11234
11235
11236#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11237 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11238 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11239
11240#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11241 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11242 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11243
11244#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11245 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11246 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
11247
11248/**
11249 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
11250 *
11251 * @returns New off.
11252 * @param pReNative .
11253 * @param off .
11254 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
11255 * bit 4 to REX.X. The two bits are part of the
11256 * REG sub-field, which isn't needed in this
11257 * function.
11258 * @param uSibAndRspOffset Two parts:
11259 * - The first 8 bits make up the SIB byte.
11260 * - The next 8 bits are the fixed RSP/ESP offset
11261 * in case of a pop [xSP].
11262 * @param u32Disp The displacement byte/word/dword, if any.
11263 * @param cbInstr The size of the fully decoded instruction. Used
11264 * for RIP relative addressing.
11265 * @param idxVarRet The result variable number.
11266 * @param f64Bit Whether to use a 64-bit or 32-bit address size
11267 * when calculating the address.
11268 *
11269 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
11270 */
11271DECL_INLINE_THROW(uint32_t)
11272iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
11273 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
11274{
11275 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
11276
11277 /*
11278 * Special case the rip + disp32 form first.
11279 */
11280 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
11281 {
11282#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
11283 /* Need to take the current PC offset into account for the displacement, no need to flush here
11284 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
11285 u32Disp += pReNative->Core.offPc;
11286#endif
11287
11288 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11289 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
11290 kIemNativeGstRegUse_ReadOnly);
11291#ifdef RT_ARCH_AMD64
11292 if (f64Bit)
11293 {
11294 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
11295 if ((int32_t)offFinalDisp == offFinalDisp)
11296 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
11297 else
11298 {
11299 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
11300 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
11301 }
11302 }
11303 else
11304 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
11305
11306#elif defined(RT_ARCH_ARM64)
11307 if (f64Bit)
11308 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11309 (int64_t)(int32_t)u32Disp + cbInstr);
11310 else
11311 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11312 (int32_t)u32Disp + cbInstr);
11313
11314#else
11315# error "Port me!"
11316#endif
11317 iemNativeRegFreeTmp(pReNative, idxRegPc);
11318 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11319 return off;
11320 }
11321
11322 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
11323 int64_t i64EffAddr = 0;
11324 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11325 {
11326 case 0: break;
11327 case 1: i64EffAddr = (int8_t)u32Disp; break;
11328 case 2: i64EffAddr = (int32_t)u32Disp; break;
11329 default: AssertFailed();
11330 }
11331
11332 /* Get the register (or SIB) value. */
11333 uint8_t idxGstRegBase = UINT8_MAX;
11334 uint8_t idxGstRegIndex = UINT8_MAX;
11335 uint8_t cShiftIndex = 0;
11336 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
11337 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
11338 else /* SIB: */
11339 {
11340 /* index /w scaling . */
11341 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
11342 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
11343 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
11344 if (idxGstRegIndex == 4)
11345 {
11346 /* no index */
11347 cShiftIndex = 0;
11348 idxGstRegIndex = UINT8_MAX;
11349 }
11350
11351 /* base */
11352 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
11353 if (idxGstRegBase == 4)
11354 {
11355 /* pop [rsp] hack */
11356 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
11357 }
11358 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
11359 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
11360 {
11361 /* mod=0 and base=5 -> disp32, no base reg. */
11362 Assert(i64EffAddr == 0);
11363 i64EffAddr = (int32_t)u32Disp;
11364 idxGstRegBase = UINT8_MAX;
11365 }
11366 }
11367
11368 /*
11369 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11370 * the start of the function.
11371 */
11372 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11373 {
11374 if (f64Bit)
11375 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
11376 else
11377 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
11378 return off;
11379 }
11380
11381 /*
11382 * Now emit code that calculates:
11383 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11384 * or if !f64Bit:
11385 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11386 */
11387 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11388 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11389 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11390 kIemNativeGstRegUse_ReadOnly);
11391 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11392 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11393 kIemNativeGstRegUse_ReadOnly);
11394
11395 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11396 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11397 {
11398 idxRegBase = idxRegIndex;
11399 idxRegIndex = UINT8_MAX;
11400 }
11401
11402#ifdef RT_ARCH_AMD64
11403 uint8_t bFinalAdj;
11404 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
11405 bFinalAdj = 0; /* likely */
11406 else
11407 {
11408 /* pop [rsp] with a problematic disp32 value. Split out the
11409 RSP offset and add it separately afterwards (bFinalAdj). */
11410 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
11411 Assert(idxGstRegBase == X86_GREG_xSP);
11412 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
11413 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
11414 Assert(bFinalAdj != 0);
11415 i64EffAddr -= bFinalAdj;
11416 Assert((int32_t)i64EffAddr == i64EffAddr);
11417 }
11418 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
11419//pReNative->pInstrBuf[off++] = 0xcc;
11420
11421 if (idxRegIndex == UINT8_MAX)
11422 {
11423 if (u32EffAddr == 0)
11424 {
11425 /* mov ret, base */
11426 if (f64Bit)
11427 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
11428 else
11429 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11430 }
11431 else
11432 {
11433 /* lea ret, [base + disp32] */
11434 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11435 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11436 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
11437 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11438 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11439 | (f64Bit ? X86_OP_REX_W : 0);
11440 pbCodeBuf[off++] = 0x8d;
11441 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11442 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11443 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11444 else
11445 {
11446 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11447 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11448 }
11449 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11450 if (bMod == X86_MOD_MEM4)
11451 {
11452 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11453 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11454 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11455 }
11456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11457 }
11458 }
11459 else
11460 {
11461 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11462 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11463 if (idxRegBase == UINT8_MAX)
11464 {
11465 /* lea ret, [(index64 << cShiftIndex) + disp32] */
11466 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
11467 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11468 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11469 | (f64Bit ? X86_OP_REX_W : 0);
11470 pbCodeBuf[off++] = 0x8d;
11471 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11472 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11473 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11474 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11475 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11476 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11477 }
11478 else
11479 {
11480 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11481 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11482 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11483 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11484 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11485 | (f64Bit ? X86_OP_REX_W : 0);
11486 pbCodeBuf[off++] = 0x8d;
11487 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11488 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11489 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11490 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11491 if (bMod != X86_MOD_MEM0)
11492 {
11493 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11494 if (bMod == X86_MOD_MEM4)
11495 {
11496 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11497 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11498 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11499 }
11500 }
11501 }
11502 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11503 }
11504
11505 if (!bFinalAdj)
11506 { /* likely */ }
11507 else
11508 {
11509 Assert(f64Bit);
11510 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
11511 }
11512
11513#elif defined(RT_ARCH_ARM64)
11514 if (i64EffAddr == 0)
11515 {
11516 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11517 if (idxRegIndex == UINT8_MAX)
11518 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
11519 else if (idxRegBase != UINT8_MAX)
11520 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11521 f64Bit, false /*fSetFlags*/, cShiftIndex);
11522 else
11523 {
11524 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
11525 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
11526 }
11527 }
11528 else
11529 {
11530 if (f64Bit)
11531 { /* likely */ }
11532 else
11533 i64EffAddr = (int32_t)i64EffAddr;
11534
11535 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
11536 {
11537 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11538 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
11539 }
11540 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
11541 {
11542 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11543 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
11544 }
11545 else
11546 {
11547 if (f64Bit)
11548 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
11549 else
11550 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
11551 if (idxRegBase != UINT8_MAX)
11552 {
11553 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11554 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
11555 }
11556 }
11557 if (idxRegIndex != UINT8_MAX)
11558 {
11559 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11560 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11561 f64Bit, false /*fSetFlags*/, cShiftIndex);
11562 }
11563 }
11564
11565#else
11566# error "port me"
11567#endif
11568
11569 if (idxRegIndex != UINT8_MAX)
11570 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11571 if (idxRegBase != UINT8_MAX)
11572 iemNativeRegFreeTmp(pReNative, idxRegBase);
11573 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11574 return off;
11575}
11576
11577
11578/*********************************************************************************************************************************
11579* TLB Lookup. *
11580*********************************************************************************************************************************/
11581
11582/**
11583 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
11584 */
11585DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
11586{
11587 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
11588 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
11589 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
11590 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
11591
11592 /* Do the lookup manually. */
11593 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
11594 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
11595 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
11596 if (RT_LIKELY(pTlbe->uTag == uTag))
11597 {
11598 /*
11599 * Check TLB page table level access flags.
11600 */
11601 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
11602 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
11603 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
11604 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
11605 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
11606 | IEMTLBE_F_PG_UNASSIGNED
11607 | IEMTLBE_F_PT_NO_ACCESSED
11608 | fNoWriteNoDirty | fNoUser);
11609 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
11610 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
11611 {
11612 /*
11613 * Return the address.
11614 */
11615 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
11616 if ((uintptr_t)pbAddr == uResult)
11617 return;
11618 RT_NOREF(cbMem);
11619 AssertFailed();
11620 }
11621 else
11622 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
11623 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
11624 }
11625 else
11626 AssertFailed();
11627 RT_BREAKPOINT();
11628}
11629
11630/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
11631
11632
11633/*********************************************************************************************************************************
11634* Memory fetches and stores common *
11635*********************************************************************************************************************************/
11636
11637typedef enum IEMNATIVEMITMEMOP
11638{
11639 kIemNativeEmitMemOp_Store = 0,
11640 kIemNativeEmitMemOp_Fetch,
11641 kIemNativeEmitMemOp_Fetch_Zx_U16,
11642 kIemNativeEmitMemOp_Fetch_Zx_U32,
11643 kIemNativeEmitMemOp_Fetch_Zx_U64,
11644 kIemNativeEmitMemOp_Fetch_Sx_U16,
11645 kIemNativeEmitMemOp_Fetch_Sx_U32,
11646 kIemNativeEmitMemOp_Fetch_Sx_U64
11647} IEMNATIVEMITMEMOP;
11648
11649/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
11650 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
11651 * (with iSegReg = UINT8_MAX). */
11652DECL_INLINE_THROW(uint32_t)
11653iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
11654 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
11655 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
11656{
11657 /*
11658 * Assert sanity.
11659 */
11660 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11661 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
11662 Assert( enmOp != kIemNativeEmitMemOp_Store
11663 || pVarValue->enmKind == kIemNativeVarKind_Immediate
11664 || pVarValue->enmKind == kIemNativeVarKind_Stack);
11665 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11666 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
11667 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
11668 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
11669 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11670 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11671 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
11672 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11673#ifdef VBOX_STRICT
11674 if (iSegReg == UINT8_MAX)
11675 {
11676 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11677 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11678 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11679 switch (cbMem)
11680 {
11681 case 1:
11682 Assert( pfnFunction
11683 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
11684 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11685 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11686 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11687 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11688 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
11689 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
11690 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
11691 : UINT64_C(0xc000b000a0009000) ));
11692 break;
11693 case 2:
11694 Assert( pfnFunction
11695 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
11696 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11697 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11698 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11699 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
11700 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
11701 : UINT64_C(0xc000b000a0009000) ));
11702 break;
11703 case 4:
11704 Assert( pfnFunction
11705 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
11706 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11707 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11708 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
11709 : UINT64_C(0xc000b000a0009000) ));
11710 break;
11711 case 8:
11712 Assert( pfnFunction
11713 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
11714 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
11715 : UINT64_C(0xc000b000a0009000) ));
11716 break;
11717 }
11718 }
11719 else
11720 {
11721 Assert(iSegReg < 6);
11722 switch (cbMem)
11723 {
11724 case 1:
11725 Assert( pfnFunction
11726 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
11727 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
11728 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11729 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11730 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11731 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
11732 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
11733 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
11734 : UINT64_C(0xc000b000a0009000) ));
11735 break;
11736 case 2:
11737 Assert( pfnFunction
11738 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11739 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11740 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11741 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11742 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11743 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11744 : UINT64_C(0xc000b000a0009000) ));
11745 break;
11746 case 4:
11747 Assert( pfnFunction
11748 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11749 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11750 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11751 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11752 : UINT64_C(0xc000b000a0009000) ));
11753 break;
11754 case 8:
11755 Assert( pfnFunction
11756 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11757 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11758 : UINT64_C(0xc000b000a0009000) ));
11759 break;
11760 }
11761 }
11762#endif
11763
11764#ifdef VBOX_STRICT
11765 /*
11766 * Check that the fExec flags we've got make sense.
11767 */
11768 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11769#endif
11770
11771 /*
11772 * To keep things simple we have to commit any pending writes first as we
11773 * may end up making calls.
11774 */
11775 /** @todo we could postpone this till we make the call and reload the
11776 * registers after returning from the call. Not sure if that's sensible or
11777 * not, though. */
11778#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11779 off = iemNativeRegFlushPendingWrites(pReNative, off);
11780#else
11781 /* The program counter is treated differently for now. */
11782 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
11783#endif
11784
11785#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11786 /*
11787 * Move/spill/flush stuff out of call-volatile registers.
11788 * This is the easy way out. We could contain this to the tlb-miss branch
11789 * by saving and restoring active stuff here.
11790 */
11791 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11792#endif
11793
11794 /*
11795 * Define labels and allocate the result register (trying for the return
11796 * register if we can).
11797 */
11798 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11799 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
11800 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11801 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
11802 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
11803 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
11804 uint8_t const idxRegValueStore = !TlbState.fSkip
11805 && enmOp == kIemNativeEmitMemOp_Store
11806 && pVarValue->enmKind != kIemNativeVarKind_Immediate
11807 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
11808 : UINT8_MAX;
11809 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11810 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11811 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11812 : UINT32_MAX;
11813
11814 /*
11815 * Jump to the TLB lookup code.
11816 */
11817 if (!TlbState.fSkip)
11818 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11819
11820 /*
11821 * TlbMiss:
11822 *
11823 * Call helper to do the fetching.
11824 * We flush all guest register shadow copies here.
11825 */
11826 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11827
11828#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11829 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11830#else
11831 RT_NOREF(idxInstr);
11832#endif
11833
11834#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
11835 if (pReNative->Core.offPc)
11836 {
11837 /*
11838 * Update the program counter but restore it at the end of the TlbMiss branch.
11839 * This should allow delaying more program counter updates for the TlbLookup and hit paths
11840 * which are hopefully much more frequent, reducing the amount of memory accesses.
11841 */
11842 /* Allocate a temporary PC register. */
11843 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
11844
11845 /* Perform the addition and store the result. */
11846 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
11847 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
11848
11849 /* Free and flush the PC register. */
11850 iemNativeRegFreeTmp(pReNative, idxPcReg);
11851 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
11852 }
11853#endif
11854
11855#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11856 /* Save variables in volatile registers. */
11857 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11858 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
11859 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
11860 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11861#endif
11862
11863 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
11864 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
11865 if (enmOp == kIemNativeEmitMemOp_Store)
11866 {
11867 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
11868 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
11869#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11870 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11871#else
11872 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
11873 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
11874#endif
11875 }
11876
11877 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
11878 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
11879#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11880 fVolGregMask);
11881#else
11882 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
11883#endif
11884
11885 if (iSegReg != UINT8_MAX)
11886 {
11887 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
11888 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11889 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
11890 }
11891
11892 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11893 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11894
11895 /* Done setting up parameters, make the call. */
11896 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11897
11898 /*
11899 * Put the result in the right register if this is a fetch.
11900 */
11901 if (enmOp != kIemNativeEmitMemOp_Store)
11902 {
11903 Assert(idxRegValueFetch == pVarValue->idxReg);
11904 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
11905 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
11906 }
11907
11908#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11909 /* Restore variables and guest shadow registers to volatile registers. */
11910 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11911 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11912#endif
11913
11914#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
11915 if (pReNative->Core.offPc)
11916 {
11917 /*
11918 * Time to restore the program counter to its original value.
11919 */
11920 /* Allocate a temporary PC register. */
11921 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
11922
11923 /* Restore the original value. */
11924 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
11925 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
11926
11927 /* Free and flush the PC register. */
11928 iemNativeRegFreeTmp(pReNative, idxPcReg);
11929 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
11930 }
11931#endif
11932
11933#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11934 if (!TlbState.fSkip)
11935 {
11936 /* end of TlbMiss - Jump to the done label. */
11937 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11938 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11939
11940 /*
11941 * TlbLookup:
11942 */
11943 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
11944 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
11945 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
11946
11947 /*
11948 * Emit code to do the actual storing / fetching.
11949 */
11950 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11951# ifdef VBOX_WITH_STATISTICS
11952 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11953 enmOp == kIemNativeEmitMemOp_Store
11954 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
11955 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
11956# endif
11957 switch (enmOp)
11958 {
11959 case kIemNativeEmitMemOp_Store:
11960 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
11961 {
11962 switch (cbMem)
11963 {
11964 case 1:
11965 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11966 break;
11967 case 2:
11968 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11969 break;
11970 case 4:
11971 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11972 break;
11973 case 8:
11974 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11975 break;
11976 default:
11977 AssertFailed();
11978 }
11979 }
11980 else
11981 {
11982 switch (cbMem)
11983 {
11984 case 1:
11985 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
11986 idxRegMemResult, TlbState.idxReg1);
11987 break;
11988 case 2:
11989 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
11990 idxRegMemResult, TlbState.idxReg1);
11991 break;
11992 case 4:
11993 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
11994 idxRegMemResult, TlbState.idxReg1);
11995 break;
11996 case 8:
11997 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
11998 idxRegMemResult, TlbState.idxReg1);
11999 break;
12000 default:
12001 AssertFailed();
12002 }
12003 }
12004 break;
12005
12006 case kIemNativeEmitMemOp_Fetch:
12007 case kIemNativeEmitMemOp_Fetch_Zx_U16:
12008 case kIemNativeEmitMemOp_Fetch_Zx_U32:
12009 case kIemNativeEmitMemOp_Fetch_Zx_U64:
12010 switch (cbMem)
12011 {
12012 case 1:
12013 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12014 break;
12015 case 2:
12016 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12017 break;
12018 case 4:
12019 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12020 break;
12021 case 8:
12022 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12023 break;
12024 default:
12025 AssertFailed();
12026 }
12027 break;
12028
12029 case kIemNativeEmitMemOp_Fetch_Sx_U16:
12030 Assert(cbMem == 1);
12031 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12032 break;
12033
12034 case kIemNativeEmitMemOp_Fetch_Sx_U32:
12035 Assert(cbMem == 1 || cbMem == 2);
12036 if (cbMem == 1)
12037 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12038 else
12039 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12040 break;
12041
12042 case kIemNativeEmitMemOp_Fetch_Sx_U64:
12043 switch (cbMem)
12044 {
12045 case 1:
12046 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12047 break;
12048 case 2:
12049 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12050 break;
12051 case 4:
12052 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12053 break;
12054 default:
12055 AssertFailed();
12056 }
12057 break;
12058
12059 default:
12060 AssertFailed();
12061 }
12062
12063 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12064
12065 /*
12066 * TlbDone:
12067 */
12068 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12069
12070 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12071
12072# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12073 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12074 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12075# endif
12076 }
12077#else
12078 RT_NOREF(fAlignMask, idxLabelTlbMiss);
12079#endif
12080
12081 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
12082 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12083 return off;
12084}
12085
12086
12087
12088/*********************************************************************************************************************************
12089* Memory fetches (IEM_MEM_FETCH_XXX). *
12090*********************************************************************************************************************************/
12091
12092/* 8-bit segmented: */
12093#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
12094 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
12095 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
12096 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12097
12098#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12099 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12100 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
12101 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12102
12103#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12104 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12105 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12106 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12107
12108#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12109 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12110 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12111 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12112
12113#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12114 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12115 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
12116 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
12117
12118#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12119 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12120 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12121 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
12122
12123#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12124 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12125 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12126 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
12127
12128/* 16-bit segmented: */
12129#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12130 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12131 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12132 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12133
12134#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
12135 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12136 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12137 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
12138
12139#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12140 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12141 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12142 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12143
12144#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12145 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12146 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12147 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12148
12149#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12150 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12151 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12152 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
12153
12154#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12155 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12156 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12157 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
12158
12159
12160/* 32-bit segmented: */
12161#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12162 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12163 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12164 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
12165
12166#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
12167 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12168 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12169 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
12170
12171#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12172 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12173 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12174 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
12175
12176#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12177 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12178 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12179 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
12180
12181
12182/* 64-bit segmented: */
12183#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12184 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12185 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
12186 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
12187
12188
12189
12190/* 8-bit flat: */
12191#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
12192 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
12193 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
12194 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12195
12196#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
12197 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12198 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
12199 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12200
12201#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
12202 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12203 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12204 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12205
12206#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
12207 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12208 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12209 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12210
12211#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
12212 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12213 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
12214 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
12215
12216#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
12217 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12218 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12219 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
12220
12221#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
12222 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12223 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12224 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
12225
12226
12227/* 16-bit flat: */
12228#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
12229 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12230 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12231 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12232
12233#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
12234 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12235 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12236 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
12237
12238#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
12239 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12240 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12241 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12242
12243#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
12244 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12245 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12246 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12247
12248#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
12249 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12250 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12251 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
12252
12253#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
12254 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12255 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12256 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
12257
12258/* 32-bit flat: */
12259#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
12260 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12261 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12262 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12263
12264#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
12265 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12266 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12267 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
12268
12269#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
12270 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12271 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12272 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12273
12274#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
12275 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12276 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12277 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
12278
12279/* 64-bit flat: */
12280#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
12281 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12282 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
12283 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
12284
12285
12286
12287/*********************************************************************************************************************************
12288* Memory stores (IEM_MEM_STORE_XXX). *
12289*********************************************************************************************************************************/
12290
12291#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
12292 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
12293 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12294 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12295
12296#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
12297 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
12298 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12299 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12300
12301#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
12302 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
12303 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12304 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12305
12306#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
12307 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
12308 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12309 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12310
12311
12312#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
12313 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
12314 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12315 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12316
12317#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
12318 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
12319 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12320 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12321
12322#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
12323 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
12324 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12325 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12326
12327#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
12328 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
12329 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12330 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12331
12332
12333#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
12334 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12335 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12336
12337#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
12338 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12339 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12340
12341#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
12342 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12343 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12344
12345#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
12346 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12347 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12348
12349
12350#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
12351 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12352 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12353
12354#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
12355 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12356 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12357
12358#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
12359 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12360 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12361
12362#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
12363 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12364 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12365
12366/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
12367 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
12368DECL_INLINE_THROW(uint32_t)
12369iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
12370 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
12371{
12372 /*
12373 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
12374 * to do the grunt work.
12375 */
12376 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
12377 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
12378 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
12379 pfnFunction, idxInstr);
12380 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
12381 return off;
12382}
12383
12384
12385
12386/*********************************************************************************************************************************
12387* Stack Accesses. *
12388*********************************************************************************************************************************/
12389/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
12390#define IEM_MC_PUSH_U16(a_u16Value) \
12391 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12392 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
12393#define IEM_MC_PUSH_U32(a_u32Value) \
12394 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12395 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
12396#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
12397 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
12398 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
12399#define IEM_MC_PUSH_U64(a_u64Value) \
12400 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12401 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
12402
12403#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
12404 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12405 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12406#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
12407 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12408 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
12409#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
12410 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
12411 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
12412
12413#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
12414 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12415 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12416#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
12417 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12418 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
12419
12420
12421DECL_FORCE_INLINE_THROW(uint32_t)
12422iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12423{
12424 /* Use16BitSp: */
12425#ifdef RT_ARCH_AMD64
12426 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12427 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12428#else
12429 /* sub regeff, regrsp, #cbMem */
12430 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
12431 /* and regeff, regeff, #0xffff */
12432 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12433 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
12434 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
12435 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
12436#endif
12437 return off;
12438}
12439
12440
12441DECL_FORCE_INLINE(uint32_t)
12442iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12443{
12444 /* Use32BitSp: */
12445 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12446 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12447 return off;
12448}
12449
12450
12451/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
12452DECL_INLINE_THROW(uint32_t)
12453iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
12454 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12455{
12456 /*
12457 * Assert sanity.
12458 */
12459 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12460 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12461#ifdef VBOX_STRICT
12462 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12463 {
12464 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12465 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12466 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12467 Assert( pfnFunction
12468 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12469 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
12470 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
12471 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12472 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
12473 : UINT64_C(0xc000b000a0009000) ));
12474 }
12475 else
12476 Assert( pfnFunction
12477 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
12478 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
12479 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
12480 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
12481 : UINT64_C(0xc000b000a0009000) ));
12482#endif
12483
12484#ifdef VBOX_STRICT
12485 /*
12486 * Check that the fExec flags we've got make sense.
12487 */
12488 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12489#endif
12490
12491 /*
12492 * To keep things simple we have to commit any pending writes first as we
12493 * may end up making calls.
12494 */
12495 /** @todo we could postpone this till we make the call and reload the
12496 * registers after returning from the call. Not sure if that's sensible or
12497 * not, though. */
12498 off = iemNativeRegFlushPendingWrites(pReNative, off);
12499
12500 /*
12501 * First we calculate the new RSP and the effective stack pointer value.
12502 * For 64-bit mode and flat 32-bit these two are the same.
12503 * (Code structure is very similar to that of PUSH)
12504 */
12505 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12506 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
12507 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
12508 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
12509 ? cbMem : sizeof(uint16_t);
12510 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12511 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12512 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12513 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12514 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12515 if (cBitsFlat != 0)
12516 {
12517 Assert(idxRegEffSp == idxRegRsp);
12518 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12519 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12520 if (cBitsFlat == 64)
12521 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
12522 else
12523 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
12524 }
12525 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12526 {
12527 Assert(idxRegEffSp != idxRegRsp);
12528 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12529 kIemNativeGstRegUse_ReadOnly);
12530#ifdef RT_ARCH_AMD64
12531 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12532#else
12533 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12534#endif
12535 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12536 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12537 offFixupJumpToUseOtherBitSp = off;
12538 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12539 {
12540 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12541 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12542 }
12543 else
12544 {
12545 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12546 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12547 }
12548 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12549 }
12550 /* SpUpdateEnd: */
12551 uint32_t const offLabelSpUpdateEnd = off;
12552
12553 /*
12554 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12555 * we're skipping lookup).
12556 */
12557 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12558 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
12559 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12560 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12561 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12562 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12563 : UINT32_MAX;
12564 uint8_t const idxRegValue = !TlbState.fSkip
12565 && pVarValue->enmKind != kIemNativeVarKind_Immediate
12566 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
12567 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
12568 : UINT8_MAX;
12569 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
12570
12571
12572 if (!TlbState.fSkip)
12573 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12574 else
12575 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12576
12577 /*
12578 * Use16BitSp:
12579 */
12580 if (cBitsFlat == 0)
12581 {
12582#ifdef RT_ARCH_AMD64
12583 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12584#else
12585 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12586#endif
12587 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12588 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12589 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12590 else
12591 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12592 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12593 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12594 }
12595
12596 /*
12597 * TlbMiss:
12598 *
12599 * Call helper to do the pushing.
12600 */
12601 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12602
12603#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12604 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12605#else
12606 RT_NOREF(idxInstr);
12607#endif
12608
12609 /* Save variables in volatile registers. */
12610 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12611 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12612 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
12613 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
12614 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12615
12616 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
12617 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
12618 {
12619 /* Swap them using ARG0 as temp register: */
12620 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
12621 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
12622 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
12623 }
12624 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
12625 {
12626 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
12627 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
12628 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12629
12630 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
12631 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12632 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12633 }
12634 else
12635 {
12636 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
12637 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12638
12639 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
12640 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
12641 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
12642 }
12643
12644 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12645 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12646
12647 /* Done setting up parameters, make the call. */
12648 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12649
12650 /* Restore variables and guest shadow registers to volatile registers. */
12651 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12652 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12653
12654#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12655 if (!TlbState.fSkip)
12656 {
12657 /* end of TlbMiss - Jump to the done label. */
12658 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12659 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12660
12661 /*
12662 * TlbLookup:
12663 */
12664 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
12665 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12666
12667 /*
12668 * Emit code to do the actual storing / fetching.
12669 */
12670 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12671# ifdef VBOX_WITH_STATISTICS
12672 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12673 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12674# endif
12675 if (idxRegValue != UINT8_MAX)
12676 {
12677 switch (cbMemAccess)
12678 {
12679 case 2:
12680 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12681 break;
12682 case 4:
12683 if (!fIsIntelSeg)
12684 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12685 else
12686 {
12687 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
12688 PUSH FS in real mode, so we have to try emulate that here.
12689 We borrow the now unused idxReg1 from the TLB lookup code here. */
12690 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
12691 kIemNativeGstReg_EFlags);
12692 if (idxRegEfl != UINT8_MAX)
12693 {
12694#ifdef ARCH_AMD64
12695 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
12696 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12697 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12698#else
12699 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
12700 off, TlbState.idxReg1, idxRegEfl,
12701 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12702#endif
12703 iemNativeRegFreeTmp(pReNative, idxRegEfl);
12704 }
12705 else
12706 {
12707 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
12708 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
12709 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12710 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12711 }
12712 /* ASSUMES the upper half of idxRegValue is ZERO. */
12713 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
12714 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
12715 }
12716 break;
12717 case 8:
12718 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12719 break;
12720 default:
12721 AssertFailed();
12722 }
12723 }
12724 else
12725 {
12726 switch (cbMemAccess)
12727 {
12728 case 2:
12729 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
12730 idxRegMemResult, TlbState.idxReg1);
12731 break;
12732 case 4:
12733 Assert(!fIsSegReg);
12734 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
12735 idxRegMemResult, TlbState.idxReg1);
12736 break;
12737 case 8:
12738 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
12739 break;
12740 default:
12741 AssertFailed();
12742 }
12743 }
12744
12745 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12746 TlbState.freeRegsAndReleaseVars(pReNative);
12747
12748 /*
12749 * TlbDone:
12750 *
12751 * Commit the new RSP value.
12752 */
12753 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12754 }
12755#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12756
12757 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
12758 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12759 if (idxRegEffSp != idxRegRsp)
12760 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12761
12762 /* The value variable is implictly flushed. */
12763 if (idxRegValue != UINT8_MAX)
12764 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12765 iemNativeVarFreeLocal(pReNative, idxVarValue);
12766
12767 return off;
12768}
12769
12770
12771
12772/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
12773#define IEM_MC_POP_GREG_U16(a_iGReg) \
12774 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12775 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
12776#define IEM_MC_POP_GREG_U32(a_iGReg) \
12777 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12778 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
12779#define IEM_MC_POP_GREG_U64(a_iGReg) \
12780 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12781 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
12782
12783#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
12784 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12785 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12786#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
12787 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12788 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
12789
12790#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
12791 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12792 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12793#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
12794 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12795 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
12796
12797
12798DECL_FORCE_INLINE_THROW(uint32_t)
12799iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
12800 uint8_t idxRegTmp)
12801{
12802 /* Use16BitSp: */
12803#ifdef RT_ARCH_AMD64
12804 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12805 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12806 RT_NOREF(idxRegTmp);
12807#else
12808 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
12809 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
12810 /* add tmp, regrsp, #cbMem */
12811 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
12812 /* and tmp, tmp, #0xffff */
12813 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12814 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
12815 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
12816 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
12817#endif
12818 return off;
12819}
12820
12821
12822DECL_FORCE_INLINE(uint32_t)
12823iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12824{
12825 /* Use32BitSp: */
12826 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12827 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12828 return off;
12829}
12830
12831
12832/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
12833DECL_INLINE_THROW(uint32_t)
12834iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
12835 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12836{
12837 /*
12838 * Assert sanity.
12839 */
12840 Assert(idxGReg < 16);
12841#ifdef VBOX_STRICT
12842 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12843 {
12844 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12845 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12846 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12847 Assert( pfnFunction
12848 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12849 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
12850 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12851 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
12852 : UINT64_C(0xc000b000a0009000) ));
12853 }
12854 else
12855 Assert( pfnFunction
12856 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
12857 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
12858 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
12859 : UINT64_C(0xc000b000a0009000) ));
12860#endif
12861
12862#ifdef VBOX_STRICT
12863 /*
12864 * Check that the fExec flags we've got make sense.
12865 */
12866 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12867#endif
12868
12869 /*
12870 * To keep things simple we have to commit any pending writes first as we
12871 * may end up making calls.
12872 */
12873 off = iemNativeRegFlushPendingWrites(pReNative, off);
12874
12875 /*
12876 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
12877 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
12878 * directly as the effective stack pointer.
12879 * (Code structure is very similar to that of PUSH)
12880 */
12881 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12882 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12883 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12884 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12885 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12886 /** @todo can do a better job picking the register here. For cbMem >= 4 this
12887 * will be the resulting register value. */
12888 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
12889
12890 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12891 if (cBitsFlat != 0)
12892 {
12893 Assert(idxRegEffSp == idxRegRsp);
12894 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12895 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12896 }
12897 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12898 {
12899 Assert(idxRegEffSp != idxRegRsp);
12900 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12901 kIemNativeGstRegUse_ReadOnly);
12902#ifdef RT_ARCH_AMD64
12903 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12904#else
12905 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12906#endif
12907 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12908 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12909 offFixupJumpToUseOtherBitSp = off;
12910 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12911 {
12912/** @todo can skip idxRegRsp updating when popping ESP. */
12913 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12914 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12915 }
12916 else
12917 {
12918 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12919 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12920 }
12921 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12922 }
12923 /* SpUpdateEnd: */
12924 uint32_t const offLabelSpUpdateEnd = off;
12925
12926 /*
12927 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12928 * we're skipping lookup).
12929 */
12930 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12931 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
12932 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12933 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12934 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12935 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12936 : UINT32_MAX;
12937
12938 if (!TlbState.fSkip)
12939 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12940 else
12941 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12942
12943 /*
12944 * Use16BitSp:
12945 */
12946 if (cBitsFlat == 0)
12947 {
12948#ifdef RT_ARCH_AMD64
12949 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12950#else
12951 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12952#endif
12953 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12954 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12955 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12956 else
12957 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12958 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12959 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12960 }
12961
12962 /*
12963 * TlbMiss:
12964 *
12965 * Call helper to do the pushing.
12966 */
12967 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12968
12969#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12970 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12971#else
12972 RT_NOREF(idxInstr);
12973#endif
12974
12975 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12976 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12977 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
12978 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12979
12980
12981 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
12982 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12983 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12984
12985 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12986 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12987
12988 /* Done setting up parameters, make the call. */
12989 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12990
12991 /* Move the return register content to idxRegMemResult. */
12992 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12993 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12994
12995 /* Restore variables and guest shadow registers to volatile registers. */
12996 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12997 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12998
12999#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13000 if (!TlbState.fSkip)
13001 {
13002 /* end of TlbMiss - Jump to the done label. */
13003 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13004 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13005
13006 /*
13007 * TlbLookup:
13008 */
13009 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
13010 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13011
13012 /*
13013 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
13014 */
13015 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13016# ifdef VBOX_WITH_STATISTICS
13017 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13018 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13019# endif
13020 switch (cbMem)
13021 {
13022 case 2:
13023 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13024 break;
13025 case 4:
13026 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13027 break;
13028 case 8:
13029 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13030 break;
13031 default:
13032 AssertFailed();
13033 }
13034
13035 TlbState.freeRegsAndReleaseVars(pReNative);
13036
13037 /*
13038 * TlbDone:
13039 *
13040 * Set the new RSP value (FLAT accesses needs to calculate it first) and
13041 * commit the popped register value.
13042 */
13043 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13044 }
13045#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
13046
13047 if (idxGReg != X86_GREG_xSP)
13048 {
13049 /* Set the register. */
13050 if (cbMem >= sizeof(uint32_t))
13051 {
13052#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13053 AssertMsg( pReNative->idxCurCall == 0
13054 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
13055 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
13056#endif
13057 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
13058 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
13059 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
13060 }
13061 else
13062 {
13063 Assert(cbMem == sizeof(uint16_t));
13064 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
13065 kIemNativeGstRegUse_ForUpdate);
13066 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
13067 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
13068 iemNativeRegFreeTmp(pReNative, idxRegDst);
13069 }
13070
13071 /* Complete RSP calculation for FLAT mode. */
13072 if (idxRegEffSp == idxRegRsp)
13073 {
13074 if (cBitsFlat == 64)
13075 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
13076 else
13077 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
13078 }
13079 }
13080 else
13081 {
13082 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
13083 if (cbMem == sizeof(uint64_t))
13084 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
13085 else if (cbMem == sizeof(uint32_t))
13086 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
13087 else
13088 {
13089 if (idxRegEffSp == idxRegRsp)
13090 {
13091 if (cBitsFlat == 64)
13092 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
13093 else
13094 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
13095 }
13096 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
13097 }
13098 }
13099 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
13100
13101 iemNativeRegFreeTmp(pReNative, idxRegRsp);
13102 if (idxRegEffSp != idxRegRsp)
13103 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
13104 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13105
13106 return off;
13107}
13108
13109
13110
13111/*********************************************************************************************************************************
13112* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
13113*********************************************************************************************************************************/
13114
13115#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13116 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13117 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
13118 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
13119
13120#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13121 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13122 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
13123 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
13124
13125#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13126 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13127 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
13128 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
13129
13130#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13131 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13132 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
13133 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
13134
13135
13136#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13137 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13138 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13139 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
13140
13141#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13142 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13143 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13144 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
13145
13146#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13147 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13148 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13149 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
13150
13151#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13152 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13153 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13154 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
13155
13156#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13157 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
13158 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13159 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
13160
13161
13162#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13163 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13164 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13165 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
13166
13167#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13168 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13169 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13170 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
13171
13172#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13173 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13174 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13175 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
13176
13177#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13178 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13179 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13180 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
13181
13182#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13183 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
13184 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13185 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
13186
13187
13188#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13189 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13190 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13191 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
13192
13193#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13194 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13195 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13196 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
13197#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13198 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13199 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13200 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
13201
13202#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13203 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13204 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13205 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
13206
13207#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13208 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
13209 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13210 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
13211
13212
13213#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13214 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
13215 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13216 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
13217
13218#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13219 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
13220 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
13221 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
13222
13223
13224#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13225 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13226 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13227 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
13228
13229#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13230 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13231 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13232 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
13233
13234#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13235 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13236 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13237 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
13238
13239#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13240 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13241 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13242 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
13243
13244
13245
13246#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13247 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13248 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
13249 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
13250
13251#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13252 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13253 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
13254 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
13255
13256#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13257 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13258 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
13259 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
13260
13261#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13262 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13263 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
13264 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
13265
13266
13267#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13268 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13269 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13270 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
13271
13272#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13273 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13274 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13275 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
13276
13277#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13278 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13279 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13280 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13281
13282#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13283 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13284 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13285 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
13286
13287#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
13288 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
13289 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13290 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13291
13292
13293#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13294 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13295 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13296 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
13297
13298#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13299 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13300 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13301 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
13302
13303#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13304 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13305 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13306 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13307
13308#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13309 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13310 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13311 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
13312
13313#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
13314 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
13315 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13316 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13317
13318
13319#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13320 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13321 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13322 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
13323
13324#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13325 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13326 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13327 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
13328
13329#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13330 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13331 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13332 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13333
13334#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13335 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13336 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13337 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
13338
13339#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
13340 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
13341 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13342 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13343
13344
13345#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
13346 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13347 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13348 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
13349
13350#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
13351 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13352 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
13353 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
13354
13355
13356#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13357 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13358 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13359 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
13360
13361#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13362 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13363 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13364 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
13365
13366#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13367 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13368 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13369 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
13370
13371#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13372 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13373 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13374 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
13375
13376
13377DECL_INLINE_THROW(uint32_t)
13378iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
13379 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
13380 uintptr_t pfnFunction, uint8_t idxInstr)
13381{
13382 /*
13383 * Assert sanity.
13384 */
13385 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
13386 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
13387 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
13388 && pVarMem->cbVar == sizeof(void *),
13389 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13390
13391 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13392 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13393 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
13394 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
13395 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13396
13397 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
13398 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
13399 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
13400 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
13401 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13402
13403 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
13404
13405 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
13406
13407#ifdef VBOX_STRICT
13408# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
13409 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
13410 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
13411 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
13412 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
13413# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
13414 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
13415 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
13416 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
13417
13418 if (iSegReg == UINT8_MAX)
13419 {
13420 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13421 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13422 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13423 switch (cbMem)
13424 {
13425 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
13426 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
13427 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
13428 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
13429 case 10:
13430 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
13431 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
13432 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13433 break;
13434 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
13435# if 0
13436 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
13437 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
13438# endif
13439 default: AssertFailed(); break;
13440 }
13441 }
13442 else
13443 {
13444 Assert(iSegReg < 6);
13445 switch (cbMem)
13446 {
13447 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
13448 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
13449 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
13450 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
13451 case 10:
13452 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
13453 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
13454 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13455 break;
13456 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
13457# if 0
13458 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
13459 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
13460# endif
13461 default: AssertFailed(); break;
13462 }
13463 }
13464# undef IEM_MAP_HLP_FN
13465# undef IEM_MAP_HLP_FN_NO_AT
13466#endif
13467
13468#ifdef VBOX_STRICT
13469 /*
13470 * Check that the fExec flags we've got make sense.
13471 */
13472 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13473#endif
13474
13475 /*
13476 * To keep things simple we have to commit any pending writes first as we
13477 * may end up making calls.
13478 */
13479 off = iemNativeRegFlushPendingWrites(pReNative, off);
13480
13481#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13482 /*
13483 * Move/spill/flush stuff out of call-volatile registers.
13484 * This is the easy way out. We could contain this to the tlb-miss branch
13485 * by saving and restoring active stuff here.
13486 */
13487 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
13488 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13489#endif
13490
13491 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
13492 while the tlb-miss codepath will temporarily put it on the stack.
13493 Set the the type to stack here so we don't need to do it twice below. */
13494 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
13495 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
13496 /** @todo use a tmp register from TlbState, since they'll be free after tlb
13497 * lookup is done. */
13498
13499 /*
13500 * Define labels and allocate the result register (trying for the return
13501 * register if we can).
13502 */
13503 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13504 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13505 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
13506 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
13507 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
13508 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13509 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13510 : UINT32_MAX;
13511//off=iemNativeEmitBrk(pReNative, off, 0);
13512 /*
13513 * Jump to the TLB lookup code.
13514 */
13515 if (!TlbState.fSkip)
13516 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13517
13518 /*
13519 * TlbMiss:
13520 *
13521 * Call helper to do the fetching.
13522 * We flush all guest register shadow copies here.
13523 */
13524 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13525
13526#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13527 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13528#else
13529 RT_NOREF(idxInstr);
13530#endif
13531
13532#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13533 /* Save variables in volatile registers. */
13534 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
13535 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13536#endif
13537
13538 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
13539 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
13540#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13541 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13542#else
13543 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13544#endif
13545
13546 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
13547 if (iSegReg != UINT8_MAX)
13548 {
13549 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13550 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
13551 }
13552
13553 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
13554 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
13555 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
13556
13557 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13558 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13559
13560 /* Done setting up parameters, make the call. */
13561 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13562
13563 /*
13564 * Put the output in the right registers.
13565 */
13566 Assert(idxRegMemResult == pVarMem->idxReg);
13567 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
13568 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
13569
13570#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13571 /* Restore variables and guest shadow registers to volatile registers. */
13572 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13573 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13574#endif
13575
13576 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
13577 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
13578
13579#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13580 if (!TlbState.fSkip)
13581 {
13582 /* end of tlbsmiss - Jump to the done label. */
13583 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13584 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13585
13586 /*
13587 * TlbLookup:
13588 */
13589 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
13590 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13591# ifdef VBOX_WITH_STATISTICS
13592 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
13593 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
13594# endif
13595
13596 /* [idxVarUnmapInfo] = 0; */
13597 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
13598
13599 /*
13600 * TlbDone:
13601 */
13602 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13603
13604 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13605
13606# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13607 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13608 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13609# endif
13610 }
13611#else
13612 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
13613#endif
13614
13615 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13616 iemNativeVarRegisterRelease(pReNative, idxVarMem);
13617
13618 return off;
13619}
13620
13621
13622#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
13623 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
13624 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
13625
13626#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
13627 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
13628 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
13629
13630#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
13631 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
13632 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
13633
13634#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
13635 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
13636 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
13637
13638DECL_INLINE_THROW(uint32_t)
13639iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
13640 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
13641{
13642 /*
13643 * Assert sanity.
13644 */
13645 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13646#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
13647 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13648#endif
13649 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
13650 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
13651 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
13652#ifdef VBOX_STRICT
13653 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
13654 {
13655 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
13656 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
13657 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
13658 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
13659 case IEM_ACCESS_TYPE_WRITE:
13660 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
13661 case IEM_ACCESS_TYPE_READ:
13662 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
13663 default: AssertFailed();
13664 }
13665#else
13666 RT_NOREF(fAccess);
13667#endif
13668
13669 /*
13670 * To keep things simple we have to commit any pending writes first as we
13671 * may end up making calls (there shouldn't be any at this point, so this
13672 * is just for consistency).
13673 */
13674 /** @todo we could postpone this till we make the call and reload the
13675 * registers after returning from the call. Not sure if that's sensible or
13676 * not, though. */
13677 off = iemNativeRegFlushPendingWrites(pReNative, off);
13678
13679 /*
13680 * Move/spill/flush stuff out of call-volatile registers.
13681 *
13682 * We exclude any register holding the bUnmapInfo variable, as we'll be
13683 * checking it after returning from the call and will free it afterwards.
13684 */
13685 /** @todo save+restore active registers and maybe guest shadows in miss
13686 * scenario. */
13687 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
13688
13689 /*
13690 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
13691 * to call the unmap helper function.
13692 *
13693 * The likelyhood of it being zero is higher than for the TLB hit when doing
13694 * the mapping, as a TLB miss for an well aligned and unproblematic memory
13695 * access should also end up with a mapping that won't need special unmapping.
13696 */
13697 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
13698 * should speed up things for the pure interpreter as well when TLBs
13699 * are enabled. */
13700#ifdef RT_ARCH_AMD64
13701 if (pVarUnmapInfo->idxReg == UINT8_MAX)
13702 {
13703 /* test byte [rbp - xxx], 0ffh */
13704 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
13705 pbCodeBuf[off++] = 0xf6;
13706 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
13707 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
13708 pbCodeBuf[off++] = 0xff;
13709 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13710 }
13711 else
13712#endif
13713 {
13714 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
13715 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
13716 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
13717 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13718 }
13719 uint32_t const offJmpFixup = off;
13720 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
13721
13722 /*
13723 * Call the unmap helper function.
13724 */
13725#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
13726 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13727#else
13728 RT_NOREF(idxInstr);
13729#endif
13730
13731 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
13732 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
13733 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13734
13735 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13736 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13737
13738 /* Done setting up parameters, make the call. */
13739 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13740
13741 /* The bUnmapInfo variable is implictly free by these MCs. */
13742 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
13743
13744 /*
13745 * Done, just fixup the jump for the non-call case.
13746 */
13747 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
13748
13749 return off;
13750}
13751
13752
13753
13754/*********************************************************************************************************************************
13755* State and Exceptions *
13756*********************************************************************************************************************************/
13757
13758#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13759#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13760
13761#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13762#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13763#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13764
13765#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13766#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13767#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13768
13769
13770DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
13771{
13772 /** @todo this needs a lot more work later. */
13773 RT_NOREF(pReNative, fForChange);
13774 return off;
13775}
13776
13777
13778
13779/*********************************************************************************************************************************
13780* Emitters for FPU related operations. *
13781*********************************************************************************************************************************/
13782
13783#define IEM_MC_FETCH_FCW(a_u16Fcw) \
13784 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
13785
13786/** Emits code for IEM_MC_FETCH_FCW. */
13787DECL_INLINE_THROW(uint32_t)
13788iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13789{
13790 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13791 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
13792
13793 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
13794
13795 /* Allocate a temporary FCW register. */
13796 /** @todo eliminate extra register */
13797 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
13798 kIemNativeGstRegUse_ReadOnly);
13799
13800 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
13801
13802 /* Free but don't flush the FCW register. */
13803 iemNativeRegFreeTmp(pReNative, idxFcwReg);
13804 iemNativeVarRegisterRelease(pReNative, idxDstVar);
13805
13806 return off;
13807}
13808
13809
13810#define IEM_MC_FETCH_FSW(a_u16Fsw) \
13811 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
13812
13813/** Emits code for IEM_MC_FETCH_FSW. */
13814DECL_INLINE_THROW(uint32_t)
13815iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13816{
13817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13818 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
13819
13820 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
13821 /* Allocate a temporary FSW register. */
13822 /** @todo eliminate extra register */
13823 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
13824 kIemNativeGstRegUse_ReadOnly);
13825
13826 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
13827
13828 /* Free but don't flush the FSW register. */
13829 iemNativeRegFreeTmp(pReNative, idxFswReg);
13830 iemNativeVarRegisterRelease(pReNative, idxDstVar);
13831
13832 return off;
13833}
13834
13835
13836
13837/*********************************************************************************************************************************
13838* The native code generator functions for each MC block. *
13839*********************************************************************************************************************************/
13840
13841/*
13842 * Include instruction emitters.
13843 */
13844#include "target-x86/IEMAllN8veEmit-x86.h"
13845
13846/*
13847 * Include g_apfnIemNativeRecompileFunctions and associated functions.
13848 *
13849 * This should probably live in it's own file later, but lets see what the
13850 * compile times turn out to be first.
13851 */
13852#include "IEMNativeFunctions.cpp.h"
13853
13854
13855
13856/*********************************************************************************************************************************
13857* Recompiler Core. *
13858*********************************************************************************************************************************/
13859
13860
13861/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
13862static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
13863{
13864 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
13865 pDis->cbCachedInstr += cbMaxRead;
13866 RT_NOREF(cbMinRead);
13867 return VERR_NO_DATA;
13868}
13869
13870
13871DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
13872{
13873 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
13874 {
13875#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
13876 ENTRY(fLocalForcedActions),
13877 ENTRY(iem.s.rcPassUp),
13878 ENTRY(iem.s.fExec),
13879 ENTRY(iem.s.pbInstrBuf),
13880 ENTRY(iem.s.uInstrBufPc),
13881 ENTRY(iem.s.GCPhysInstrBuf),
13882 ENTRY(iem.s.cbInstrBufTotal),
13883 ENTRY(iem.s.idxTbCurInstr),
13884#ifdef VBOX_WITH_STATISTICS
13885 ENTRY(iem.s.StatNativeTlbHitsForFetch),
13886 ENTRY(iem.s.StatNativeTlbHitsForStore),
13887 ENTRY(iem.s.StatNativeTlbHitsForStack),
13888 ENTRY(iem.s.StatNativeTlbHitsForMapped),
13889 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
13890 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
13891 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
13892 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
13893#endif
13894 ENTRY(iem.s.DataTlb.aEntries),
13895 ENTRY(iem.s.DataTlb.uTlbRevision),
13896 ENTRY(iem.s.DataTlb.uTlbPhysRev),
13897 ENTRY(iem.s.DataTlb.cTlbHits),
13898 ENTRY(iem.s.CodeTlb.aEntries),
13899 ENTRY(iem.s.CodeTlb.uTlbRevision),
13900 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
13901 ENTRY(iem.s.CodeTlb.cTlbHits),
13902 ENTRY(pVMR3),
13903 ENTRY(cpum.GstCtx.rax),
13904 ENTRY(cpum.GstCtx.ah),
13905 ENTRY(cpum.GstCtx.rcx),
13906 ENTRY(cpum.GstCtx.ch),
13907 ENTRY(cpum.GstCtx.rdx),
13908 ENTRY(cpum.GstCtx.dh),
13909 ENTRY(cpum.GstCtx.rbx),
13910 ENTRY(cpum.GstCtx.bh),
13911 ENTRY(cpum.GstCtx.rsp),
13912 ENTRY(cpum.GstCtx.rbp),
13913 ENTRY(cpum.GstCtx.rsi),
13914 ENTRY(cpum.GstCtx.rdi),
13915 ENTRY(cpum.GstCtx.r8),
13916 ENTRY(cpum.GstCtx.r9),
13917 ENTRY(cpum.GstCtx.r10),
13918 ENTRY(cpum.GstCtx.r11),
13919 ENTRY(cpum.GstCtx.r12),
13920 ENTRY(cpum.GstCtx.r13),
13921 ENTRY(cpum.GstCtx.r14),
13922 ENTRY(cpum.GstCtx.r15),
13923 ENTRY(cpum.GstCtx.es.Sel),
13924 ENTRY(cpum.GstCtx.es.u64Base),
13925 ENTRY(cpum.GstCtx.es.u32Limit),
13926 ENTRY(cpum.GstCtx.es.Attr),
13927 ENTRY(cpum.GstCtx.cs.Sel),
13928 ENTRY(cpum.GstCtx.cs.u64Base),
13929 ENTRY(cpum.GstCtx.cs.u32Limit),
13930 ENTRY(cpum.GstCtx.cs.Attr),
13931 ENTRY(cpum.GstCtx.ss.Sel),
13932 ENTRY(cpum.GstCtx.ss.u64Base),
13933 ENTRY(cpum.GstCtx.ss.u32Limit),
13934 ENTRY(cpum.GstCtx.ss.Attr),
13935 ENTRY(cpum.GstCtx.ds.Sel),
13936 ENTRY(cpum.GstCtx.ds.u64Base),
13937 ENTRY(cpum.GstCtx.ds.u32Limit),
13938 ENTRY(cpum.GstCtx.ds.Attr),
13939 ENTRY(cpum.GstCtx.fs.Sel),
13940 ENTRY(cpum.GstCtx.fs.u64Base),
13941 ENTRY(cpum.GstCtx.fs.u32Limit),
13942 ENTRY(cpum.GstCtx.fs.Attr),
13943 ENTRY(cpum.GstCtx.gs.Sel),
13944 ENTRY(cpum.GstCtx.gs.u64Base),
13945 ENTRY(cpum.GstCtx.gs.u32Limit),
13946 ENTRY(cpum.GstCtx.gs.Attr),
13947 ENTRY(cpum.GstCtx.rip),
13948 ENTRY(cpum.GstCtx.eflags),
13949 ENTRY(cpum.GstCtx.uRipInhibitInt),
13950#undef ENTRY
13951 };
13952#ifdef VBOX_STRICT
13953 static bool s_fOrderChecked = false;
13954 if (!s_fOrderChecked)
13955 {
13956 s_fOrderChecked = true;
13957 uint32_t offPrev = s_aMembers[0].off;
13958 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
13959 {
13960 Assert(s_aMembers[i].off > offPrev);
13961 offPrev = s_aMembers[i].off;
13962 }
13963 }
13964#endif
13965
13966 /*
13967 * Binary lookup.
13968 */
13969 unsigned iStart = 0;
13970 unsigned iEnd = RT_ELEMENTS(s_aMembers);
13971 for (;;)
13972 {
13973 unsigned const iCur = iStart + (iEnd - iStart) / 2;
13974 uint32_t const offCur = s_aMembers[iCur].off;
13975 if (off < offCur)
13976 {
13977 if (iCur != iStart)
13978 iEnd = iCur;
13979 else
13980 break;
13981 }
13982 else if (off > offCur)
13983 {
13984 if (iCur + 1 < iEnd)
13985 iStart = iCur + 1;
13986 else
13987 break;
13988 }
13989 else
13990 return s_aMembers[iCur].pszName;
13991 }
13992#ifdef VBOX_WITH_STATISTICS
13993 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
13994 return "iem.s.acThreadedFuncStats[iFn]";
13995#endif
13996 return NULL;
13997}
13998
13999
14000/**
14001 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
14002 * @returns pszBuf.
14003 * @param fFlags The flags.
14004 * @param pszBuf The output buffer.
14005 * @param cbBuf The output buffer size. At least 32 bytes.
14006 */
14007DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
14008{
14009 Assert(cbBuf >= 32);
14010 static RTSTRTUPLE const s_aModes[] =
14011 {
14012 /* [00] = */ { RT_STR_TUPLE("16BIT") },
14013 /* [01] = */ { RT_STR_TUPLE("32BIT") },
14014 /* [02] = */ { RT_STR_TUPLE("!2!") },
14015 /* [03] = */ { RT_STR_TUPLE("!3!") },
14016 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
14017 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
14018 /* [06] = */ { RT_STR_TUPLE("!6!") },
14019 /* [07] = */ { RT_STR_TUPLE("!7!") },
14020 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
14021 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
14022 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
14023 /* [0b] = */ { RT_STR_TUPLE("!b!") },
14024 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
14025 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
14026 /* [0e] = */ { RT_STR_TUPLE("!e!") },
14027 /* [0f] = */ { RT_STR_TUPLE("!f!") },
14028 /* [10] = */ { RT_STR_TUPLE("!10!") },
14029 /* [11] = */ { RT_STR_TUPLE("!11!") },
14030 /* [12] = */ { RT_STR_TUPLE("!12!") },
14031 /* [13] = */ { RT_STR_TUPLE("!13!") },
14032 /* [14] = */ { RT_STR_TUPLE("!14!") },
14033 /* [15] = */ { RT_STR_TUPLE("!15!") },
14034 /* [16] = */ { RT_STR_TUPLE("!16!") },
14035 /* [17] = */ { RT_STR_TUPLE("!17!") },
14036 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
14037 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
14038 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
14039 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
14040 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
14041 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
14042 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
14043 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
14044 };
14045 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
14046 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
14047 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
14048
14049 pszBuf[off++] = ' ';
14050 pszBuf[off++] = 'C';
14051 pszBuf[off++] = 'P';
14052 pszBuf[off++] = 'L';
14053 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
14054 Assert(off < 32);
14055
14056 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
14057
14058 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
14059 {
14060 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
14061 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
14062 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
14063 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
14064 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
14065 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
14066 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
14067 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
14068 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
14069 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
14070 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
14071 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
14072 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
14073 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
14074 };
14075 if (fFlags)
14076 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
14077 if (s_aFlags[i].fFlag & fFlags)
14078 {
14079 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
14080 pszBuf[off++] = ' ';
14081 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
14082 off += s_aFlags[i].cchName;
14083 fFlags &= ~s_aFlags[i].fFlag;
14084 if (!fFlags)
14085 break;
14086 }
14087 pszBuf[off] = '\0';
14088
14089 return pszBuf;
14090}
14091
14092
14093DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
14094{
14095 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
14096#if defined(RT_ARCH_AMD64)
14097 static const char * const a_apszMarkers[] =
14098 {
14099 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
14100 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
14101 };
14102#endif
14103
14104 char szDisBuf[512];
14105 DISSTATE Dis;
14106 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
14107 uint32_t const cNative = pTb->Native.cInstructions;
14108 uint32_t offNative = 0;
14109#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14110 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
14111#endif
14112 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
14113 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
14114 : DISCPUMODE_64BIT;
14115#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14116 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
14117#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14118 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
14119#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14120# error "Port me"
14121#else
14122 csh hDisasm = ~(size_t)0;
14123# if defined(RT_ARCH_AMD64)
14124 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
14125# elif defined(RT_ARCH_ARM64)
14126 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
14127# else
14128# error "Port me"
14129# endif
14130 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
14131#endif
14132
14133 /*
14134 * Print TB info.
14135 */
14136 pHlp->pfnPrintf(pHlp,
14137 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
14138 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
14139 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
14140 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
14141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14142 if (pDbgInfo && pDbgInfo->cEntries > 1)
14143 {
14144 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
14145
14146 /*
14147 * This disassembly is driven by the debug info which follows the native
14148 * code and indicates when it starts with the next guest instructions,
14149 * where labels are and such things.
14150 */
14151 uint32_t idxThreadedCall = 0;
14152 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
14153 uint8_t idxRange = UINT8_MAX;
14154 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
14155 uint32_t offRange = 0;
14156 uint32_t offOpcodes = 0;
14157 uint32_t const cbOpcodes = pTb->cbOpcodes;
14158 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
14159 uint32_t const cDbgEntries = pDbgInfo->cEntries;
14160 uint32_t iDbgEntry = 1;
14161 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
14162
14163 while (offNative < cNative)
14164 {
14165 /* If we're at or have passed the point where the next chunk of debug
14166 info starts, process it. */
14167 if (offDbgNativeNext <= offNative)
14168 {
14169 offDbgNativeNext = UINT32_MAX;
14170 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
14171 {
14172 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
14173 {
14174 case kIemTbDbgEntryType_GuestInstruction:
14175 {
14176 /* Did the exec flag change? */
14177 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
14178 {
14179 pHlp->pfnPrintf(pHlp,
14180 " fExec change %#08x -> %#08x %s\n",
14181 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
14182 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
14183 szDisBuf, sizeof(szDisBuf)));
14184 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
14185 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
14186 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
14187 : DISCPUMODE_64BIT;
14188 }
14189
14190 /* New opcode range? We need to fend up a spurious debug info entry here for cases
14191 where the compilation was aborted before the opcode was recorded and the actual
14192 instruction was translated to a threaded call. This may happen when we run out
14193 of ranges, or when some complicated interrupts/FFs are found to be pending or
14194 similar. So, we just deal with it here rather than in the compiler code as it
14195 is a lot simpler to do here. */
14196 if ( idxRange == UINT8_MAX
14197 || idxRange >= cRanges
14198 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
14199 {
14200 idxRange += 1;
14201 if (idxRange < cRanges)
14202 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
14203 else
14204 continue;
14205 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
14206 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
14207 + (pTb->aRanges[idxRange].idxPhysPage == 0
14208 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14209 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
14210 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14211 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
14212 pTb->aRanges[idxRange].idxPhysPage);
14213 GCPhysPc += offRange;
14214 }
14215
14216 /* Disassemble the instruction. */
14217 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
14218 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
14219 uint32_t cbInstr = 1;
14220 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14221 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
14222 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14223 if (RT_SUCCESS(rc))
14224 {
14225 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14226 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14227 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14228 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14229
14230 static unsigned const s_offMarker = 55;
14231 static char const s_szMarker[] = " ; <--- guest";
14232 if (cch < s_offMarker)
14233 {
14234 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
14235 cch = s_offMarker;
14236 }
14237 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
14238 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
14239
14240 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
14241 }
14242 else
14243 {
14244 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
14245 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
14246 cbInstr = 1;
14247 }
14248 GCPhysPc += cbInstr;
14249 offOpcodes += cbInstr;
14250 offRange += cbInstr;
14251 continue;
14252 }
14253
14254 case kIemTbDbgEntryType_ThreadedCall:
14255 pHlp->pfnPrintf(pHlp,
14256 " Call #%u to %s (%u args) - %s\n",
14257 idxThreadedCall,
14258 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14259 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14260 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
14261 idxThreadedCall++;
14262 continue;
14263
14264 case kIemTbDbgEntryType_GuestRegShadowing:
14265 {
14266 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
14267 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
14268 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
14269 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
14270 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14271 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
14272 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
14273 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
14274 else
14275 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
14276 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
14277 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14278 continue;
14279 }
14280
14281 case kIemTbDbgEntryType_Label:
14282 {
14283 const char *pszName = "what_the_fudge";
14284 const char *pszComment = "";
14285 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
14286 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
14287 {
14288 case kIemNativeLabelType_Return:
14289 pszName = "Return";
14290 break;
14291 case kIemNativeLabelType_ReturnBreak:
14292 pszName = "ReturnBreak";
14293 break;
14294 case kIemNativeLabelType_ReturnWithFlags:
14295 pszName = "ReturnWithFlags";
14296 break;
14297 case kIemNativeLabelType_NonZeroRetOrPassUp:
14298 pszName = "NonZeroRetOrPassUp";
14299 break;
14300 case kIemNativeLabelType_RaiseGp0:
14301 pszName = "RaiseGp0";
14302 break;
14303 case kIemNativeLabelType_RaiseNm:
14304 pszName = "RaiseNm";
14305 break;
14306 case kIemNativeLabelType_RaiseUd:
14307 pszName = "RaiseUd";
14308 break;
14309 case kIemNativeLabelType_ObsoleteTb:
14310 pszName = "ObsoleteTb";
14311 break;
14312 case kIemNativeLabelType_NeedCsLimChecking:
14313 pszName = "NeedCsLimChecking";
14314 break;
14315 case kIemNativeLabelType_CheckBranchMiss:
14316 pszName = "CheckBranchMiss";
14317 break;
14318 case kIemNativeLabelType_If:
14319 pszName = "If";
14320 fNumbered = true;
14321 break;
14322 case kIemNativeLabelType_Else:
14323 pszName = "Else";
14324 fNumbered = true;
14325 pszComment = " ; regs state restored pre-if-block";
14326 break;
14327 case kIemNativeLabelType_Endif:
14328 pszName = "Endif";
14329 fNumbered = true;
14330 break;
14331 case kIemNativeLabelType_CheckIrq:
14332 pszName = "CheckIrq_CheckVM";
14333 fNumbered = true;
14334 break;
14335 case kIemNativeLabelType_TlbLookup:
14336 pszName = "TlbLookup";
14337 fNumbered = true;
14338 break;
14339 case kIemNativeLabelType_TlbMiss:
14340 pszName = "TlbMiss";
14341 fNumbered = true;
14342 break;
14343 case kIemNativeLabelType_TlbDone:
14344 pszName = "TlbDone";
14345 fNumbered = true;
14346 break;
14347 case kIemNativeLabelType_Invalid:
14348 case kIemNativeLabelType_End:
14349 break;
14350 }
14351 if (fNumbered)
14352 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
14353 else
14354 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
14355 continue;
14356 }
14357
14358 case kIemTbDbgEntryType_NativeOffset:
14359 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
14360 Assert(offDbgNativeNext > offNative);
14361 break;
14362
14363#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
14364 case kIemTbDbgEntryType_DelayedPcUpdate:
14365 pHlp->pfnPrintf(pHlp,
14366 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
14367 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
14368 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
14369 continue;
14370#endif
14371
14372 default:
14373 AssertFailed();
14374 }
14375 iDbgEntry++;
14376 break;
14377 }
14378 }
14379
14380 /*
14381 * Disassemble the next native instruction.
14382 */
14383 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14384# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14385 uint32_t cbInstr = sizeof(paNative[0]);
14386 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14387 if (RT_SUCCESS(rc))
14388 {
14389# if defined(RT_ARCH_AMD64)
14390 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14391 {
14392 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14393 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14394 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14395 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14396 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14397 uInfo & 0x8000 ? "recompiled" : "todo");
14398 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14399 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14400 else
14401 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14402 }
14403 else
14404# endif
14405 {
14406 const char *pszAnnotation = NULL;
14407# ifdef RT_ARCH_AMD64
14408 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14409 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14410 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14411 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14412 PCDISOPPARAM pMemOp;
14413 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
14414 pMemOp = &Dis.Param1;
14415 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
14416 pMemOp = &Dis.Param2;
14417 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
14418 pMemOp = &Dis.Param3;
14419 else
14420 pMemOp = NULL;
14421 if ( pMemOp
14422 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
14423 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
14424 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
14425 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
14426
14427#elif defined(RT_ARCH_ARM64)
14428 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14429 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14430 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14431# else
14432# error "Port me"
14433# endif
14434 if (pszAnnotation)
14435 {
14436 static unsigned const s_offAnnotation = 55;
14437 size_t const cchAnnotation = strlen(pszAnnotation);
14438 size_t cchDis = strlen(szDisBuf);
14439 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
14440 {
14441 if (cchDis < s_offAnnotation)
14442 {
14443 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
14444 cchDis = s_offAnnotation;
14445 }
14446 szDisBuf[cchDis++] = ' ';
14447 szDisBuf[cchDis++] = ';';
14448 szDisBuf[cchDis++] = ' ';
14449 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
14450 }
14451 }
14452 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14453 }
14454 }
14455 else
14456 {
14457# if defined(RT_ARCH_AMD64)
14458 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14459 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14460# elif defined(RT_ARCH_ARM64)
14461 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14462# else
14463# error "Port me"
14464# endif
14465 cbInstr = sizeof(paNative[0]);
14466 }
14467 offNative += cbInstr / sizeof(paNative[0]);
14468
14469# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14470 cs_insn *pInstr;
14471 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14472 (uintptr_t)pNativeCur, 1, &pInstr);
14473 if (cInstrs > 0)
14474 {
14475 Assert(cInstrs == 1);
14476# if defined(RT_ARCH_AMD64)
14477 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14478 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14479# else
14480 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14481 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14482# endif
14483 offNative += pInstr->size / sizeof(*pNativeCur);
14484 cs_free(pInstr, cInstrs);
14485 }
14486 else
14487 {
14488# if defined(RT_ARCH_AMD64)
14489 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14490 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14491# else
14492 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14493# endif
14494 offNative++;
14495 }
14496# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14497 }
14498 }
14499 else
14500#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
14501 {
14502 /*
14503 * No debug info, just disassemble the x86 code and then the native code.
14504 *
14505 * First the guest code:
14506 */
14507 for (unsigned i = 0; i < pTb->cRanges; i++)
14508 {
14509 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
14510 + (pTb->aRanges[i].idxPhysPage == 0
14511 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14512 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
14513 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14514 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
14515 unsigned off = pTb->aRanges[i].offOpcodes;
14516 /** @todo this ain't working when crossing pages! */
14517 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
14518 while (off < cbOpcodes)
14519 {
14520 uint32_t cbInstr = 1;
14521 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14522 &pTb->pabOpcodes[off], cbOpcodes - off,
14523 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14524 if (RT_SUCCESS(rc))
14525 {
14526 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14527 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14528 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14529 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14530 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
14531 GCPhysPc += cbInstr;
14532 off += cbInstr;
14533 }
14534 else
14535 {
14536 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
14537 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
14538 break;
14539 }
14540 }
14541 }
14542
14543 /*
14544 * Then the native code:
14545 */
14546 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
14547 while (offNative < cNative)
14548 {
14549 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14550# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14551 uint32_t cbInstr = sizeof(paNative[0]);
14552 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14553 if (RT_SUCCESS(rc))
14554 {
14555# if defined(RT_ARCH_AMD64)
14556 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14557 {
14558 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14559 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14560 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14561 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14562 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14563 uInfo & 0x8000 ? "recompiled" : "todo");
14564 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14565 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14566 else
14567 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14568 }
14569 else
14570# endif
14571 {
14572# ifdef RT_ARCH_AMD64
14573 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14574 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14575 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14576 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14577# elif defined(RT_ARCH_ARM64)
14578 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14579 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14580 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14581# else
14582# error "Port me"
14583# endif
14584 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14585 }
14586 }
14587 else
14588 {
14589# if defined(RT_ARCH_AMD64)
14590 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14591 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14592# else
14593 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14594# endif
14595 cbInstr = sizeof(paNative[0]);
14596 }
14597 offNative += cbInstr / sizeof(paNative[0]);
14598
14599# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14600 cs_insn *pInstr;
14601 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14602 (uintptr_t)pNativeCur, 1, &pInstr);
14603 if (cInstrs > 0)
14604 {
14605 Assert(cInstrs == 1);
14606# if defined(RT_ARCH_AMD64)
14607 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14608 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14609# else
14610 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14611 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14612# endif
14613 offNative += pInstr->size / sizeof(*pNativeCur);
14614 cs_free(pInstr, cInstrs);
14615 }
14616 else
14617 {
14618# if defined(RT_ARCH_AMD64)
14619 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14620 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14621# else
14622 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14623# endif
14624 offNative++;
14625 }
14626# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14627 }
14628 }
14629
14630#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14631 /* Cleanup. */
14632 cs_close(&hDisasm);
14633#endif
14634}
14635
14636
14637/**
14638 * Recompiles the given threaded TB into a native one.
14639 *
14640 * In case of failure the translation block will be returned as-is.
14641 *
14642 * @returns pTb.
14643 * @param pVCpu The cross context virtual CPU structure of the calling
14644 * thread.
14645 * @param pTb The threaded translation to recompile to native.
14646 */
14647DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
14648{
14649 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
14650
14651 /*
14652 * The first time thru, we allocate the recompiler state, the other times
14653 * we just need to reset it before using it again.
14654 */
14655 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
14656 if (RT_LIKELY(pReNative))
14657 iemNativeReInit(pReNative, pTb);
14658 else
14659 {
14660 pReNative = iemNativeInit(pVCpu, pTb);
14661 AssertReturn(pReNative, pTb);
14662 }
14663
14664#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14665 /*
14666 * First do liveness analysis. This is done backwards.
14667 */
14668 {
14669 uint32_t idxCall = pTb->Thrd.cCalls;
14670 if (idxCall <= pReNative->cLivenessEntriesAlloc)
14671 { /* likely */ }
14672 else
14673 {
14674 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
14675 while (idxCall > cAlloc)
14676 cAlloc *= 2;
14677 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
14678 AssertReturn(pvNew, pTb);
14679 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
14680 pReNative->cLivenessEntriesAlloc = cAlloc;
14681 }
14682 AssertReturn(idxCall > 0, pTb);
14683 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
14684
14685 /* The initial (final) entry. */
14686 idxCall--;
14687 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
14688
14689 /* Loop backwards thru the calls and fill in the other entries. */
14690 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
14691 while (idxCall > 0)
14692 {
14693 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
14694 if (pfnLiveness)
14695 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
14696 else
14697 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
14698 pCallEntry--;
14699 idxCall--;
14700 }
14701
14702# ifdef VBOX_WITH_STATISTICS
14703 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
14704 to 'clobbered' rather that 'input'. */
14705 /** @todo */
14706# endif
14707 }
14708#endif
14709
14710 /*
14711 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
14712 * for aborting if an error happens.
14713 */
14714 uint32_t cCallsLeft = pTb->Thrd.cCalls;
14715#ifdef LOG_ENABLED
14716 uint32_t const cCallsOrg = cCallsLeft;
14717#endif
14718 uint32_t off = 0;
14719 int rc = VINF_SUCCESS;
14720 IEMNATIVE_TRY_SETJMP(pReNative, rc)
14721 {
14722 /*
14723 * Emit prolog code (fixed).
14724 */
14725 off = iemNativeEmitProlog(pReNative, off);
14726
14727 /*
14728 * Convert the calls to native code.
14729 */
14730#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14731 int32_t iGstInstr = -1;
14732#endif
14733#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
14734 uint32_t cThreadedCalls = 0;
14735 uint32_t cRecompiledCalls = 0;
14736#endif
14737#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14738 uint32_t idxCurCall = 0;
14739#endif
14740 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
14741 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
14742 while (cCallsLeft-- > 0)
14743 {
14744 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
14745#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14746 pReNative->idxCurCall = idxCurCall;
14747#endif
14748
14749 /*
14750 * Debug info, assembly markup and statistics.
14751 */
14752#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
14753 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
14754 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
14755#endif
14756#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14757 iemNativeDbgInfoAddNativeOffset(pReNative, off);
14758 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
14759 {
14760 if (iGstInstr < (int32_t)pTb->cInstructions)
14761 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
14762 else
14763 Assert(iGstInstr == pTb->cInstructions);
14764 iGstInstr = pCallEntry->idxInstr;
14765 }
14766 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
14767#endif
14768#if defined(VBOX_STRICT)
14769 off = iemNativeEmitMarker(pReNative, off,
14770 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
14771#endif
14772#if defined(VBOX_STRICT)
14773 iemNativeRegAssertSanity(pReNative);
14774#endif
14775#ifdef VBOX_WITH_STATISTICS
14776 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
14777#endif
14778
14779 /*
14780 * Actual work.
14781 */
14782 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
14783 pfnRecom ? "(recompiled)" : "(todo)"));
14784 if (pfnRecom) /** @todo stats on this. */
14785 {
14786 off = pfnRecom(pReNative, off, pCallEntry);
14787 STAM_REL_STATS({cRecompiledCalls++;});
14788 }
14789 else
14790 {
14791 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
14792 STAM_REL_STATS({cThreadedCalls++;});
14793 }
14794 Assert(off <= pReNative->cInstrBufAlloc);
14795 Assert(pReNative->cCondDepth == 0);
14796
14797#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
14798 if (LogIs2Enabled())
14799 {
14800 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
14801# ifndef IEMLIVENESS_EXTENDED_LAYOUT
14802 static const char s_achState[] = "CUXI";
14803# else
14804 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
14805# endif
14806
14807 char szGpr[17];
14808 for (unsigned i = 0; i < 16; i++)
14809 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
14810 szGpr[16] = '\0';
14811
14812 char szSegBase[X86_SREG_COUNT + 1];
14813 char szSegLimit[X86_SREG_COUNT + 1];
14814 char szSegAttrib[X86_SREG_COUNT + 1];
14815 char szSegSel[X86_SREG_COUNT + 1];
14816 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
14817 {
14818 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
14819 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
14820 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
14821 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
14822 }
14823 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
14824 = szSegSel[X86_SREG_COUNT] = '\0';
14825
14826 char szEFlags[8];
14827 for (unsigned i = 0; i < 7; i++)
14828 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
14829 szEFlags[7] = '\0';
14830
14831 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
14832 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
14833 }
14834#endif
14835
14836 /*
14837 * Advance.
14838 */
14839 pCallEntry++;
14840#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14841 idxCurCall++;
14842#endif
14843 }
14844
14845 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
14846 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
14847 if (!cThreadedCalls)
14848 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
14849
14850 /*
14851 * Emit the epilog code.
14852 */
14853 uint32_t idxReturnLabel;
14854 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
14855
14856 /*
14857 * Generate special jump labels.
14858 */
14859 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
14860 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
14861 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
14862 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
14863 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
14864 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
14865 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
14866 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
14867 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
14868 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
14869 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
14870 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
14871 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
14872 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
14873 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
14874 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
14875 }
14876 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
14877 {
14878 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
14879 return pTb;
14880 }
14881 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
14882 Assert(off <= pReNative->cInstrBufAlloc);
14883
14884 /*
14885 * Make sure all labels has been defined.
14886 */
14887 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
14888#ifdef VBOX_STRICT
14889 uint32_t const cLabels = pReNative->cLabels;
14890 for (uint32_t i = 0; i < cLabels; i++)
14891 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
14892#endif
14893
14894 /*
14895 * Allocate executable memory, copy over the code we've generated.
14896 */
14897 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
14898 if (pTbAllocator->pDelayedFreeHead)
14899 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
14900
14901 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
14902 AssertReturn(paFinalInstrBuf, pTb);
14903 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
14904
14905 /*
14906 * Apply fixups.
14907 */
14908 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
14909 uint32_t const cFixups = pReNative->cFixups;
14910 for (uint32_t i = 0; i < cFixups; i++)
14911 {
14912 Assert(paFixups[i].off < off);
14913 Assert(paFixups[i].idxLabel < cLabels);
14914 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
14915 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
14916 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
14917 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
14918 switch (paFixups[i].enmType)
14919 {
14920#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
14921 case kIemNativeFixupType_Rel32:
14922 Assert(paFixups[i].off + 4 <= off);
14923 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14924 continue;
14925
14926#elif defined(RT_ARCH_ARM64)
14927 case kIemNativeFixupType_RelImm26At0:
14928 {
14929 Assert(paFixups[i].off < off);
14930 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14931 Assert(offDisp >= -262144 && offDisp < 262144);
14932 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
14933 continue;
14934 }
14935
14936 case kIemNativeFixupType_RelImm19At5:
14937 {
14938 Assert(paFixups[i].off < off);
14939 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14940 Assert(offDisp >= -262144 && offDisp < 262144);
14941 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
14942 continue;
14943 }
14944
14945 case kIemNativeFixupType_RelImm14At5:
14946 {
14947 Assert(paFixups[i].off < off);
14948 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14949 Assert(offDisp >= -8192 && offDisp < 8192);
14950 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
14951 continue;
14952 }
14953
14954#endif
14955 case kIemNativeFixupType_Invalid:
14956 case kIemNativeFixupType_End:
14957 break;
14958 }
14959 AssertFailed();
14960 }
14961
14962 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
14963 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
14964
14965 /*
14966 * Convert the translation block.
14967 */
14968 RTMemFree(pTb->Thrd.paCalls);
14969 pTb->Native.paInstructions = paFinalInstrBuf;
14970 pTb->Native.cInstructions = off;
14971 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
14972#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14973 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
14974 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
14975#endif
14976
14977 Assert(pTbAllocator->cThreadedTbs > 0);
14978 pTbAllocator->cThreadedTbs -= 1;
14979 pTbAllocator->cNativeTbs += 1;
14980 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
14981
14982#ifdef LOG_ENABLED
14983 /*
14984 * Disassemble to the log if enabled.
14985 */
14986 if (LogIs3Enabled())
14987 {
14988 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
14989 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
14990# ifdef DEBUG_bird
14991 RTLogFlush(NULL);
14992# endif
14993 }
14994#endif
14995 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
14996
14997 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
14998 return pTb;
14999}
15000
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette