VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103666

Last change on this file since 103666 was 103666, checked in by vboxsync, 9 months ago

VMM/IEM: Native translation of IEM_MC_MAYBE_RAISE_FPU_XCPT() body, bugref:10371 [fix]

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 670.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103666 2024-03-04 13:11:28Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when it wants to raise a \#NM.
1601 */
1602IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1603{
1604 iemRaiseDeviceNotAvailableJmp(pVCpu);
1605#ifndef _MSC_VER
1606 return VINF_IEM_RAISED_XCPT; /* not reached */
1607#endif
1608}
1609
1610
1611/**
1612 * Used by TB code when it wants to raise a \#UD.
1613 */
1614IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1615{
1616 iemRaiseUndefinedOpcodeJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise a \#MF.
1625 */
1626IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1627{
1628 iemRaiseMathFaultJmp(pVCpu);
1629#ifndef _MSC_VER
1630 return VINF_IEM_RAISED_XCPT; /* not reached */
1631#endif
1632}
1633
1634
1635/**
1636 * Used by TB code when detecting opcode changes.
1637 * @see iemThreadeFuncWorkerObsoleteTb
1638 */
1639IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1640{
1641 /* We set fSafeToFree to false where as we're being called in the context
1642 of a TB callback function, which for native TBs means we cannot release
1643 the executable memory till we've returned our way back to iemTbExec as
1644 that return path codes via the native code generated for the TB. */
1645 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1646 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1647 return VINF_IEM_REEXEC_BREAK;
1648}
1649
1650
1651/**
1652 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1653 */
1654IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1655{
1656 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1657 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1658 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1659 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1660 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1661 return VINF_IEM_REEXEC_BREAK;
1662}
1663
1664
1665/**
1666 * Used by TB code when we missed a PC check after a branch.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1669{
1670 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1671 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1672 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1673 pVCpu->iem.s.pbInstrBuf));
1674 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1675 return VINF_IEM_REEXEC_BREAK;
1676}
1677
1678
1679
1680/*********************************************************************************************************************************
1681* Helpers: Segmented memory fetches and stores. *
1682*********************************************************************************************************************************/
1683
1684/**
1685 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1686 */
1687IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1688{
1689#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1690 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1691#else
1692 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1693#endif
1694}
1695
1696
1697/**
1698 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1699 * to 16 bits.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1702{
1703#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1704 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1705#else
1706 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1707#endif
1708}
1709
1710
1711/**
1712 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1713 * to 32 bits.
1714 */
1715IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1716{
1717#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1718 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1719#else
1720 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1721#endif
1722}
1723
1724/**
1725 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1726 * to 64 bits.
1727 */
1728IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1729{
1730#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1731 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1732#else
1733 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1734#endif
1735}
1736
1737
1738/**
1739 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1753 * to 32 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1767 * to 64 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778
1779/**
1780 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1794 * to 64 bits.
1795 */
1796IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1797{
1798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1799 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1800#else
1801 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1802#endif
1803}
1804
1805
1806/**
1807 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1821 */
1822IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1823{
1824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1825 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1826#else
1827 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1828#endif
1829}
1830
1831
1832/**
1833 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1834 */
1835IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1836{
1837#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1838 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1839#else
1840 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1841#endif
1842}
1843
1844
1845/**
1846 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1847 */
1848IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1849{
1850#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1851 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1852#else
1853 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1854#endif
1855}
1856
1857
1858/**
1859 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1860 */
1861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1862{
1863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1864 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1865#else
1866 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1867#endif
1868}
1869
1870
1871
1872/**
1873 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1874 */
1875IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1876{
1877#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1878 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1879#else
1880 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1881#endif
1882}
1883
1884
1885/**
1886 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1887 */
1888IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1889{
1890#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1891 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1892#else
1893 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1894#endif
1895}
1896
1897
1898/**
1899 * Used by TB code to store an 32-bit selector value onto a generic stack.
1900 *
1901 * Intel CPUs doesn't do write a whole dword, thus the special function.
1902 */
1903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1904{
1905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1906 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1907#else
1908 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1909#endif
1910}
1911
1912
1913/**
1914 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1917{
1918#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1919 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1920#else
1921 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1922#endif
1923}
1924
1925
1926/**
1927 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1928 */
1929IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1930{
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1932 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1933#else
1934 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1935#endif
1936}
1937
1938
1939/**
1940 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1941 */
1942IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1943{
1944#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1945 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1946#else
1947 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1948#endif
1949}
1950
1951
1952/**
1953 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1954 */
1955IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1956{
1957#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1958 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1959#else
1960 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1961#endif
1962}
1963
1964
1965
1966/*********************************************************************************************************************************
1967* Helpers: Flat memory fetches and stores. *
1968*********************************************************************************************************************************/
1969
1970/**
1971 * Used by TB code to load unsigned 8-bit data w/ flat address.
1972 * @note Zero extending the value to 64-bit to simplify assembly.
1973 */
1974IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1975{
1976#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1977 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1978#else
1979 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1980#endif
1981}
1982
1983
1984/**
1985 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1986 * to 16 bits.
1987 * @note Zero extending the value to 64-bit to simplify assembly.
1988 */
1989IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1990{
1991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1992 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1993#else
1994 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1995#endif
1996}
1997
1998
1999/**
2000 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2001 * to 32 bits.
2002 * @note Zero extending the value to 64-bit to simplify assembly.
2003 */
2004IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2005{
2006#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2007 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2008#else
2009 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2010#endif
2011}
2012
2013
2014/**
2015 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2016 * to 64 bits.
2017 */
2018IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2019{
2020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2021 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2022#else
2023 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2024#endif
2025}
2026
2027
2028/**
2029 * Used by TB code to load unsigned 16-bit data w/ flat address.
2030 * @note Zero extending the value to 64-bit to simplify assembly.
2031 */
2032IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2033{
2034#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2035 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2036#else
2037 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2038#endif
2039}
2040
2041
2042/**
2043 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2044 * to 32 bits.
2045 * @note Zero extending the value to 64-bit to simplify assembly.
2046 */
2047IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2048{
2049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2050 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2051#else
2052 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2053#endif
2054}
2055
2056
2057/**
2058 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2059 * to 64 bits.
2060 * @note Zero extending the value to 64-bit to simplify assembly.
2061 */
2062IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2063{
2064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2065 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2066#else
2067 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2068#endif
2069}
2070
2071
2072/**
2073 * Used by TB code to load unsigned 32-bit data w/ flat address.
2074 * @note Zero extending the value to 64-bit to simplify assembly.
2075 */
2076IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2077{
2078#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2079 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2080#else
2081 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2082#endif
2083}
2084
2085
2086/**
2087 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2088 * to 64 bits.
2089 * @note Zero extending the value to 64-bit to simplify assembly.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2092{
2093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2094 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2095#else
2096 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2097#endif
2098}
2099
2100
2101/**
2102 * Used by TB code to load unsigned 64-bit data w/ flat address.
2103 */
2104IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2105{
2106#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2107 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2108#else
2109 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2110#endif
2111}
2112
2113
2114/**
2115 * Used by TB code to store unsigned 8-bit data w/ flat address.
2116 */
2117IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2118{
2119#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2120 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2121#else
2122 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2123#endif
2124}
2125
2126
2127/**
2128 * Used by TB code to store unsigned 16-bit data w/ flat address.
2129 */
2130IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2131{
2132#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2133 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2134#else
2135 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2136#endif
2137}
2138
2139
2140/**
2141 * Used by TB code to store unsigned 32-bit data w/ flat address.
2142 */
2143IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2144{
2145#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2146 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2147#else
2148 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2149#endif
2150}
2151
2152
2153/**
2154 * Used by TB code to store unsigned 64-bit data w/ flat address.
2155 */
2156IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2157{
2158#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2159 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2160#else
2161 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2162#endif
2163}
2164
2165
2166
2167/**
2168 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2169 */
2170IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2171{
2172#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2173 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2174#else
2175 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2176#endif
2177}
2178
2179
2180/**
2181 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2182 */
2183IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2184{
2185#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2186 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2187#else
2188 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2189#endif
2190}
2191
2192
2193/**
2194 * Used by TB code to store a segment selector value onto a flat stack.
2195 *
2196 * Intel CPUs doesn't do write a whole dword, thus the special function.
2197 */
2198IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2199{
2200#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2201 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2202#else
2203 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2204#endif
2205}
2206
2207
2208/**
2209 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2210 */
2211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2212{
2213#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2214 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2215#else
2216 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2217#endif
2218}
2219
2220
2221/**
2222 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2227 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2228#else
2229 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2236 */
2237IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2238{
2239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2240 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2241#else
2242 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2243#endif
2244}
2245
2246
2247/**
2248 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2249 */
2250IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2251{
2252#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2253 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2254#else
2255 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2256#endif
2257}
2258
2259
2260
2261/*********************************************************************************************************************************
2262* Helpers: Segmented memory mapping. *
2263*********************************************************************************************************************************/
2264
2265/**
2266 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2267 * segmentation.
2268 */
2269IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2270 RTGCPTR GCPtrMem, uint8_t iSegReg))
2271{
2272#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2273 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2274#else
2275 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2276#endif
2277}
2278
2279
2280/**
2281 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2282 */
2283IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2284 RTGCPTR GCPtrMem, uint8_t iSegReg))
2285{
2286#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2287 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2288#else
2289 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#endif
2291}
2292
2293
2294/**
2295 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2296 */
2297IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2298 RTGCPTR GCPtrMem, uint8_t iSegReg))
2299{
2300#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2301 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2302#else
2303 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#endif
2305}
2306
2307
2308/**
2309 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2310 */
2311IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2312 RTGCPTR GCPtrMem, uint8_t iSegReg))
2313{
2314#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2315 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2316#else
2317 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2318#endif
2319}
2320
2321
2322/**
2323 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2324 * segmentation.
2325 */
2326IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2327 RTGCPTR GCPtrMem, uint8_t iSegReg))
2328{
2329#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2330 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2331#else
2332 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2333#endif
2334}
2335
2336
2337/**
2338 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2339 */
2340IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2341 RTGCPTR GCPtrMem, uint8_t iSegReg))
2342{
2343#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2344 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2345#else
2346 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#endif
2348}
2349
2350
2351/**
2352 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2353 */
2354IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2355 RTGCPTR GCPtrMem, uint8_t iSegReg))
2356{
2357#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2358 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2359#else
2360 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#endif
2362}
2363
2364
2365/**
2366 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2367 */
2368IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2369 RTGCPTR GCPtrMem, uint8_t iSegReg))
2370{
2371#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2372 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2373#else
2374 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2375#endif
2376}
2377
2378
2379/**
2380 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2381 * segmentation.
2382 */
2383IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2384 RTGCPTR GCPtrMem, uint8_t iSegReg))
2385{
2386#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2387 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2388#else
2389 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2390#endif
2391}
2392
2393
2394/**
2395 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2396 */
2397IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2398 RTGCPTR GCPtrMem, uint8_t iSegReg))
2399{
2400#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2401 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2402#else
2403 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#endif
2405}
2406
2407
2408/**
2409 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2410 */
2411IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2412 RTGCPTR GCPtrMem, uint8_t iSegReg))
2413{
2414#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2415 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2416#else
2417 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#endif
2419}
2420
2421
2422/**
2423 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2424 */
2425IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2426 RTGCPTR GCPtrMem, uint8_t iSegReg))
2427{
2428#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2429 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2430#else
2431 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2432#endif
2433}
2434
2435
2436/**
2437 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2438 * segmentation.
2439 */
2440IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2441 RTGCPTR GCPtrMem, uint8_t iSegReg))
2442{
2443#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2444 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2445#else
2446 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2447#endif
2448}
2449
2450
2451/**
2452 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2453 */
2454IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2455 RTGCPTR GCPtrMem, uint8_t iSegReg))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2458 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2459#else
2460 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2469 RTGCPTR GCPtrMem, uint8_t iSegReg))
2470{
2471#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2472 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2473#else
2474 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#endif
2476}
2477
2478
2479/**
2480 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2481 */
2482IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2483 RTGCPTR GCPtrMem, uint8_t iSegReg))
2484{
2485#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2486 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2487#else
2488 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#endif
2490}
2491
2492
2493/**
2494 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2495 */
2496IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2497 RTGCPTR GCPtrMem, uint8_t iSegReg))
2498{
2499#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2500 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2501#else
2502 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#endif
2504}
2505
2506
2507/**
2508 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2509 */
2510IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2511 RTGCPTR GCPtrMem, uint8_t iSegReg))
2512{
2513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2514 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2515#else
2516 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2517#endif
2518}
2519
2520
2521/**
2522 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2523 * segmentation.
2524 */
2525IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2526 RTGCPTR GCPtrMem, uint8_t iSegReg))
2527{
2528#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2529 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2530#else
2531 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2532#endif
2533}
2534
2535
2536/**
2537 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2538 */
2539IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2540 RTGCPTR GCPtrMem, uint8_t iSegReg))
2541{
2542#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2543 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2544#else
2545 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#endif
2547}
2548
2549
2550/**
2551 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2552 */
2553IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2554 RTGCPTR GCPtrMem, uint8_t iSegReg))
2555{
2556#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2557 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2558#else
2559 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#endif
2561}
2562
2563
2564/**
2565 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2566 */
2567IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2568 RTGCPTR GCPtrMem, uint8_t iSegReg))
2569{
2570#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2571 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2572#else
2573 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2574#endif
2575}
2576
2577
2578/*********************************************************************************************************************************
2579* Helpers: Flat memory mapping. *
2580*********************************************************************************************************************************/
2581
2582/**
2583 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2584 * address.
2585 */
2586IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2587{
2588#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2589 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2590#else
2591 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2592#endif
2593}
2594
2595
2596/**
2597 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2598 */
2599IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2600{
2601#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2602 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2603#else
2604 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2605#endif
2606}
2607
2608
2609/**
2610 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2611 */
2612IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2613{
2614#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2615 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2616#else
2617 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2618#endif
2619}
2620
2621
2622/**
2623 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2624 */
2625IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2626{
2627#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2628 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2629#else
2630 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2631#endif
2632}
2633
2634
2635/**
2636 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2637 * address.
2638 */
2639IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2640{
2641#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2642 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2643#else
2644 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2645#endif
2646}
2647
2648
2649/**
2650 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2651 */
2652IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2653{
2654#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2655 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2656#else
2657 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2658#endif
2659}
2660
2661
2662/**
2663 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2664 */
2665IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2666{
2667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2668 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2669#else
2670 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2671#endif
2672}
2673
2674
2675/**
2676 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2677 */
2678IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2679{
2680#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2681 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2682#else
2683 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2684#endif
2685}
2686
2687
2688/**
2689 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2690 * address.
2691 */
2692IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2693{
2694#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2695 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2696#else
2697 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2698#endif
2699}
2700
2701
2702/**
2703 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2704 */
2705IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2706{
2707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2708 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2709#else
2710 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2711#endif
2712}
2713
2714
2715/**
2716 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2717 */
2718IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2719{
2720#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2721 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2722#else
2723 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2724#endif
2725}
2726
2727
2728/**
2729 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2730 */
2731IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2732{
2733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2734 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2735#else
2736 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2737#endif
2738}
2739
2740
2741/**
2742 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2743 * address.
2744 */
2745IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2746{
2747#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2748 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2749#else
2750 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2751#endif
2752}
2753
2754
2755/**
2756 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2757 */
2758IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2759{
2760#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2761 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2762#else
2763 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2764#endif
2765}
2766
2767
2768/**
2769 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2770 */
2771IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2772{
2773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2774 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2775#else
2776 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2777#endif
2778}
2779
2780
2781/**
2782 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2783 */
2784IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2785{
2786#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2787 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2788#else
2789 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2790#endif
2791}
2792
2793
2794/**
2795 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2796 */
2797IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2798{
2799#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2800 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2801#else
2802 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2803#endif
2804}
2805
2806
2807/**
2808 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2809 */
2810IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2811{
2812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2813 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2814#else
2815 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2816#endif
2817}
2818
2819
2820/**
2821 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2822 * address.
2823 */
2824IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2825{
2826#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2827 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2828#else
2829 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2830#endif
2831}
2832
2833
2834/**
2835 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2836 */
2837IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2838{
2839#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2840 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2841#else
2842 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2843#endif
2844}
2845
2846
2847/**
2848 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2849 */
2850IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2851{
2852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2853 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2854#else
2855 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2856#endif
2857}
2858
2859
2860/**
2861 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2862 */
2863IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2864{
2865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2866 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2867#else
2868 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2869#endif
2870}
2871
2872
2873/*********************************************************************************************************************************
2874* Helpers: Commit, rollback & unmap *
2875*********************************************************************************************************************************/
2876
2877/**
2878 * Used by TB code to commit and unmap a read-write memory mapping.
2879 */
2880IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2881{
2882 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2883}
2884
2885
2886/**
2887 * Used by TB code to commit and unmap a read-write memory mapping.
2888 */
2889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2890{
2891 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2892}
2893
2894
2895/**
2896 * Used by TB code to commit and unmap a write-only memory mapping.
2897 */
2898IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2899{
2900 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2901}
2902
2903
2904/**
2905 * Used by TB code to commit and unmap a read-only memory mapping.
2906 */
2907IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2908{
2909 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2910}
2911
2912
2913/**
2914 * Reinitializes the native recompiler state.
2915 *
2916 * Called before starting a new recompile job.
2917 */
2918static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2919{
2920 pReNative->cLabels = 0;
2921 pReNative->bmLabelTypes = 0;
2922 pReNative->cFixups = 0;
2923#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2924 pReNative->pDbgInfo->cEntries = 0;
2925#endif
2926 pReNative->pTbOrg = pTb;
2927 pReNative->cCondDepth = 0;
2928 pReNative->uCondSeqNo = 0;
2929 pReNative->uCheckIrqSeqNo = 0;
2930 pReNative->uTlbSeqNo = 0;
2931
2932#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2933 pReNative->Core.offPc = 0;
2934 pReNative->Core.cInstrPcUpdateSkipped = 0;
2935#endif
2936 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2937#if IEMNATIVE_HST_GREG_COUNT < 32
2938 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2939#endif
2940 ;
2941 pReNative->Core.bmHstRegsWithGstShadow = 0;
2942 pReNative->Core.bmGstRegShadows = 0;
2943 pReNative->Core.bmVars = 0;
2944 pReNative->Core.bmStack = 0;
2945 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2946 pReNative->Core.u64ArgVars = UINT64_MAX;
2947
2948 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 12);
2949 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2950 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2951 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2952 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2953 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2954 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2955 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2956 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2957 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2958 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2959 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2960 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2961
2962 /* Full host register reinit: */
2963 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2964 {
2965 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2966 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2967 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2968 }
2969
2970 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2971 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2972#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2973 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2974#endif
2975#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2976 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2977#endif
2978#ifdef IEMNATIVE_REG_FIXED_TMP1
2979 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2980#endif
2981#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2982 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2983#endif
2984 );
2985 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2986 {
2987 fRegs &= ~RT_BIT_32(idxReg);
2988 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2989 }
2990
2991 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2992#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2993 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2994#endif
2995#ifdef IEMNATIVE_REG_FIXED_TMP0
2996 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_TMP1
2999 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3000#endif
3001#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3002 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3003#endif
3004 return pReNative;
3005}
3006
3007
3008/**
3009 * Allocates and initializes the native recompiler state.
3010 *
3011 * This is called the first time an EMT wants to recompile something.
3012 *
3013 * @returns Pointer to the new recompiler state.
3014 * @param pVCpu The cross context virtual CPU structure of the calling
3015 * thread.
3016 * @param pTb The TB that's about to be recompiled.
3017 * @thread EMT(pVCpu)
3018 */
3019static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3020{
3021 VMCPU_ASSERT_EMT(pVCpu);
3022
3023 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3024 AssertReturn(pReNative, NULL);
3025
3026 /*
3027 * Try allocate all the buffers and stuff we need.
3028 */
3029 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3030 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3031 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3032#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3033 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3034#endif
3035 if (RT_LIKELY( pReNative->pInstrBuf
3036 && pReNative->paLabels
3037 && pReNative->paFixups)
3038#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3039 && pReNative->pDbgInfo
3040#endif
3041 )
3042 {
3043 /*
3044 * Set the buffer & array sizes on success.
3045 */
3046 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3047 pReNative->cLabelsAlloc = _8K;
3048 pReNative->cFixupsAlloc = _16K;
3049#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3050 pReNative->cDbgInfoAlloc = _16K;
3051#endif
3052
3053 /* Other constant stuff: */
3054 pReNative->pVCpu = pVCpu;
3055
3056 /*
3057 * Done, just need to save it and reinit it.
3058 */
3059 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3060 return iemNativeReInit(pReNative, pTb);
3061 }
3062
3063 /*
3064 * Failed. Cleanup and return.
3065 */
3066 AssertFailed();
3067 RTMemFree(pReNative->pInstrBuf);
3068 RTMemFree(pReNative->paLabels);
3069 RTMemFree(pReNative->paFixups);
3070#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3071 RTMemFree(pReNative->pDbgInfo);
3072#endif
3073 RTMemFree(pReNative);
3074 return NULL;
3075}
3076
3077
3078/**
3079 * Creates a label
3080 *
3081 * If the label does not yet have a defined position,
3082 * call iemNativeLabelDefine() later to set it.
3083 *
3084 * @returns Label ID. Throws VBox status code on failure, so no need to check
3085 * the return value.
3086 * @param pReNative The native recompile state.
3087 * @param enmType The label type.
3088 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3089 * label is not yet defined (default).
3090 * @param uData Data associated with the lable. Only applicable to
3091 * certain type of labels. Default is zero.
3092 */
3093DECL_HIDDEN_THROW(uint32_t)
3094iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3095 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3096{
3097 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3098
3099 /*
3100 * Locate existing label definition.
3101 *
3102 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3103 * and uData is zero.
3104 */
3105 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3106 uint32_t const cLabels = pReNative->cLabels;
3107 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3108#ifndef VBOX_STRICT
3109 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3110 && offWhere == UINT32_MAX
3111 && uData == 0
3112#endif
3113 )
3114 {
3115#ifndef VBOX_STRICT
3116 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3117 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3118 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3119 if (idxLabel < pReNative->cLabels)
3120 return idxLabel;
3121#else
3122 for (uint32_t i = 0; i < cLabels; i++)
3123 if ( paLabels[i].enmType == enmType
3124 && paLabels[i].uData == uData)
3125 {
3126 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3127 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3128 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3129 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3130 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3131 return i;
3132 }
3133 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3134 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3135#endif
3136 }
3137
3138 /*
3139 * Make sure we've got room for another label.
3140 */
3141 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3142 { /* likely */ }
3143 else
3144 {
3145 uint32_t cNew = pReNative->cLabelsAlloc;
3146 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3147 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3148 cNew *= 2;
3149 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3150 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3151 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3152 pReNative->paLabels = paLabels;
3153 pReNative->cLabelsAlloc = cNew;
3154 }
3155
3156 /*
3157 * Define a new label.
3158 */
3159 paLabels[cLabels].off = offWhere;
3160 paLabels[cLabels].enmType = enmType;
3161 paLabels[cLabels].uData = uData;
3162 pReNative->cLabels = cLabels + 1;
3163
3164 Assert((unsigned)enmType < 64);
3165 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3166
3167 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3168 {
3169 Assert(uData == 0);
3170 pReNative->aidxUniqueLabels[enmType] = cLabels;
3171 }
3172
3173 if (offWhere != UINT32_MAX)
3174 {
3175#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3176 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3177 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3178#endif
3179 }
3180 return cLabels;
3181}
3182
3183
3184/**
3185 * Defines the location of an existing label.
3186 *
3187 * @param pReNative The native recompile state.
3188 * @param idxLabel The label to define.
3189 * @param offWhere The position.
3190 */
3191DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3192{
3193 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3194 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3195 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3196 pLabel->off = offWhere;
3197#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3198 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3199 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3200#endif
3201}
3202
3203
3204/**
3205 * Looks up a lable.
3206 *
3207 * @returns Label ID if found, UINT32_MAX if not.
3208 */
3209static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3210 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3211{
3212 Assert((unsigned)enmType < 64);
3213 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3214 {
3215 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3216 return pReNative->aidxUniqueLabels[enmType];
3217
3218 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3219 uint32_t const cLabels = pReNative->cLabels;
3220 for (uint32_t i = 0; i < cLabels; i++)
3221 if ( paLabels[i].enmType == enmType
3222 && paLabels[i].uData == uData
3223 && ( paLabels[i].off == offWhere
3224 || offWhere == UINT32_MAX
3225 || paLabels[i].off == UINT32_MAX))
3226 return i;
3227 }
3228 return UINT32_MAX;
3229}
3230
3231
3232/**
3233 * Adds a fixup.
3234 *
3235 * @throws VBox status code (int) on failure.
3236 * @param pReNative The native recompile state.
3237 * @param offWhere The instruction offset of the fixup location.
3238 * @param idxLabel The target label ID for the fixup.
3239 * @param enmType The fixup type.
3240 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3241 */
3242DECL_HIDDEN_THROW(void)
3243iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3244 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3245{
3246 Assert(idxLabel <= UINT16_MAX);
3247 Assert((unsigned)enmType <= UINT8_MAX);
3248
3249 /*
3250 * Make sure we've room.
3251 */
3252 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3253 uint32_t const cFixups = pReNative->cFixups;
3254 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3255 { /* likely */ }
3256 else
3257 {
3258 uint32_t cNew = pReNative->cFixupsAlloc;
3259 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3260 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3261 cNew *= 2;
3262 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3263 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3264 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3265 pReNative->paFixups = paFixups;
3266 pReNative->cFixupsAlloc = cNew;
3267 }
3268
3269 /*
3270 * Add the fixup.
3271 */
3272 paFixups[cFixups].off = offWhere;
3273 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3274 paFixups[cFixups].enmType = enmType;
3275 paFixups[cFixups].offAddend = offAddend;
3276 pReNative->cFixups = cFixups + 1;
3277}
3278
3279
3280/**
3281 * Slow code path for iemNativeInstrBufEnsure.
3282 */
3283DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3284{
3285 /* Double the buffer size till we meet the request. */
3286 uint32_t cNew = pReNative->cInstrBufAlloc;
3287 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3288 do
3289 cNew *= 2;
3290 while (cNew < off + cInstrReq);
3291
3292 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3293#ifdef RT_ARCH_ARM64
3294 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3295#else
3296 uint32_t const cbMaxInstrBuf = _2M;
3297#endif
3298 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3299
3300 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3301 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3302
3303#ifdef VBOX_STRICT
3304 pReNative->offInstrBufChecked = off + cInstrReq;
3305#endif
3306 pReNative->cInstrBufAlloc = cNew;
3307 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3308}
3309
3310#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3311
3312/**
3313 * Grows the static debug info array used during recompilation.
3314 *
3315 * @returns Pointer to the new debug info block; throws VBox status code on
3316 * failure, so no need to check the return value.
3317 */
3318DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3319{
3320 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3321 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3322 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3323 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3324 pReNative->pDbgInfo = pDbgInfo;
3325 pReNative->cDbgInfoAlloc = cNew;
3326 return pDbgInfo;
3327}
3328
3329
3330/**
3331 * Adds a new debug info uninitialized entry, returning the pointer to it.
3332 */
3333DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3334{
3335 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3336 { /* likely */ }
3337 else
3338 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3339 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3340}
3341
3342
3343/**
3344 * Debug Info: Adds a native offset record, if necessary.
3345 */
3346static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3347{
3348 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3349
3350 /*
3351 * Search backwards to see if we've got a similar record already.
3352 */
3353 uint32_t idx = pDbgInfo->cEntries;
3354 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3355 while (idx-- > idxStop)
3356 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3357 {
3358 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3359 return;
3360 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3361 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3362 break;
3363 }
3364
3365 /*
3366 * Add it.
3367 */
3368 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3369 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3370 pEntry->NativeOffset.offNative = off;
3371}
3372
3373
3374/**
3375 * Debug Info: Record info about a label.
3376 */
3377static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3378{
3379 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3380 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3381 pEntry->Label.uUnused = 0;
3382 pEntry->Label.enmLabel = (uint8_t)enmType;
3383 pEntry->Label.uData = uData;
3384}
3385
3386
3387/**
3388 * Debug Info: Record info about a threaded call.
3389 */
3390static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3391{
3392 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3393 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3394 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3395 pEntry->ThreadedCall.uUnused = 0;
3396 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3397}
3398
3399
3400/**
3401 * Debug Info: Record info about a new guest instruction.
3402 */
3403static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3404{
3405 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3406 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3407 pEntry->GuestInstruction.uUnused = 0;
3408 pEntry->GuestInstruction.fExec = fExec;
3409}
3410
3411
3412/**
3413 * Debug Info: Record info about guest register shadowing.
3414 */
3415static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3416 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3417{
3418 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3419 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3420 pEntry->GuestRegShadowing.uUnused = 0;
3421 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3422 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3423 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3424}
3425
3426
3427# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3428/**
3429 * Debug Info: Record info about delayed RIP updates.
3430 */
3431static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3432{
3433 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3434 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3435 pEntry->DelayedPcUpdate.offPc = offPc;
3436 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3437}
3438# endif
3439
3440#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3441
3442
3443/*********************************************************************************************************************************
3444* Register Allocator *
3445*********************************************************************************************************************************/
3446
3447/**
3448 * Register parameter indexes (indexed by argument number).
3449 */
3450DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3451{
3452 IEMNATIVE_CALL_ARG0_GREG,
3453 IEMNATIVE_CALL_ARG1_GREG,
3454 IEMNATIVE_CALL_ARG2_GREG,
3455 IEMNATIVE_CALL_ARG3_GREG,
3456#if defined(IEMNATIVE_CALL_ARG4_GREG)
3457 IEMNATIVE_CALL_ARG4_GREG,
3458# if defined(IEMNATIVE_CALL_ARG5_GREG)
3459 IEMNATIVE_CALL_ARG5_GREG,
3460# if defined(IEMNATIVE_CALL_ARG6_GREG)
3461 IEMNATIVE_CALL_ARG6_GREG,
3462# if defined(IEMNATIVE_CALL_ARG7_GREG)
3463 IEMNATIVE_CALL_ARG7_GREG,
3464# endif
3465# endif
3466# endif
3467#endif
3468};
3469
3470/**
3471 * Call register masks indexed by argument count.
3472 */
3473DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3474{
3475 0,
3476 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3477 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3478 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3479 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3480 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3481#if defined(IEMNATIVE_CALL_ARG4_GREG)
3482 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3483 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3484# if defined(IEMNATIVE_CALL_ARG5_GREG)
3485 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3486 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3487# if defined(IEMNATIVE_CALL_ARG6_GREG)
3488 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3489 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3490 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3491# if defined(IEMNATIVE_CALL_ARG7_GREG)
3492 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3493 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3494 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3495# endif
3496# endif
3497# endif
3498#endif
3499};
3500
3501#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3502/**
3503 * BP offset of the stack argument slots.
3504 *
3505 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3506 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3507 */
3508DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3509{
3510 IEMNATIVE_FP_OFF_STACK_ARG0,
3511# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3512 IEMNATIVE_FP_OFF_STACK_ARG1,
3513# endif
3514# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3515 IEMNATIVE_FP_OFF_STACK_ARG2,
3516# endif
3517# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3518 IEMNATIVE_FP_OFF_STACK_ARG3,
3519# endif
3520};
3521AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3522#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3523
3524/**
3525 * Info about shadowed guest register values.
3526 * @see IEMNATIVEGSTREG
3527 */
3528static struct
3529{
3530 /** Offset in VMCPU. */
3531 uint32_t off;
3532 /** The field size. */
3533 uint8_t cb;
3534 /** Name (for logging). */
3535 const char *pszName;
3536} const g_aGstShadowInfo[] =
3537{
3538#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3539 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3540 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3541 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3542 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3543 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3544 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3545 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3546 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3547 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3548 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3549 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3550 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3551 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3552 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3553 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3554 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3555 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3556 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3557 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3558 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3559 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3560 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3561 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3562 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3563 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3564 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3565 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3566 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3567 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3568 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3569 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3570 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3571 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3572 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3573 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3574 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3575 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3576 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3577 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3578 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3579 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3580 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3581 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3582 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3583 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3584 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3585 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3586#undef CPUMCTX_OFF_AND_SIZE
3587};
3588AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3589
3590
3591/** Host CPU general purpose register names. */
3592DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3593{
3594#ifdef RT_ARCH_AMD64
3595 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3596#elif RT_ARCH_ARM64
3597 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3598 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3599#else
3600# error "port me"
3601#endif
3602};
3603
3604
3605DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3606 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3607{
3608 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3609
3610 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3611 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3612 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3613 return (uint8_t)idxReg;
3614}
3615
3616
3617#if 0 /* unused */
3618/**
3619 * Tries to locate a suitable register in the given register mask.
3620 *
3621 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3622 * failed.
3623 *
3624 * @returns Host register number on success, returns UINT8_MAX on failure.
3625 */
3626static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3627{
3628 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3629 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3630 if (fRegs)
3631 {
3632 /** @todo pick better here: */
3633 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3634
3635 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3636 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3637 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3638 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3639
3640 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3641 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3642 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3643 return idxReg;
3644 }
3645 return UINT8_MAX;
3646}
3647#endif /* unused */
3648
3649
3650/**
3651 * Locate a register, possibly freeing one up.
3652 *
3653 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3654 * failed.
3655 *
3656 * @returns Host register number on success. Returns UINT8_MAX if no registers
3657 * found, the caller is supposed to deal with this and raise a
3658 * allocation type specific status code (if desired).
3659 *
3660 * @throws VBox status code if we're run into trouble spilling a variable of
3661 * recording debug info. Does NOT throw anything if we're out of
3662 * registers, though.
3663 */
3664static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3665 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3666{
3667 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3668 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3669 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3670
3671 /*
3672 * Try a freed register that's shadowing a guest register.
3673 */
3674 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3675 if (fRegs)
3676 {
3677 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3678
3679#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3680 /*
3681 * When we have livness information, we use it to kick out all shadowed
3682 * guest register that will not be needed any more in this TB. If we're
3683 * lucky, this may prevent us from ending up here again.
3684 *
3685 * Note! We must consider the previous entry here so we don't free
3686 * anything that the current threaded function requires (current
3687 * entry is produced by the next threaded function).
3688 */
3689 uint32_t const idxCurCall = pReNative->idxCurCall;
3690 if (idxCurCall > 0)
3691 {
3692 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3693
3694# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3695 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3696 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3697 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3698#else
3699 /* Construct a mask of the registers not in the read or write state.
3700 Note! We could skips writes, if they aren't from us, as this is just
3701 a hack to prevent trashing registers that have just been written
3702 or will be written when we retire the current instruction. */
3703 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3704 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3705 & IEMLIVENESSBIT_MASK;
3706#endif
3707 /* Merge EFLAGS. */
3708 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3709 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3710 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3711 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3712 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3713
3714 /* If it matches any shadowed registers. */
3715 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3716 {
3717 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3718 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3719 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3720
3721 /* See if we've got any unshadowed registers we can return now. */
3722 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3723 if (fUnshadowedRegs)
3724 {
3725 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3726 return (fPreferVolatile
3727 ? ASMBitFirstSetU32(fUnshadowedRegs)
3728 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3729 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3730 - 1;
3731 }
3732 }
3733 }
3734#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3735
3736 unsigned const idxReg = (fPreferVolatile
3737 ? ASMBitFirstSetU32(fRegs)
3738 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3739 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3740 - 1;
3741
3742 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3743 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3744 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3745 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3746
3747 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3748 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3749 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3750 return idxReg;
3751 }
3752
3753 /*
3754 * Try free up a variable that's in a register.
3755 *
3756 * We do two rounds here, first evacuating variables we don't need to be
3757 * saved on the stack, then in the second round move things to the stack.
3758 */
3759 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3760 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3761 {
3762 uint32_t fVars = pReNative->Core.bmVars;
3763 while (fVars)
3764 {
3765 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3766 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3767 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3768 && (RT_BIT_32(idxReg) & fRegMask)
3769 && ( iLoop == 0
3770 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3771 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3772 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3773 {
3774 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3775 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3776 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3777 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3778 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3779 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3780
3781 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3782 {
3783 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3784 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3785 }
3786
3787 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3788 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3789
3790 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3791 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3792 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3793 return idxReg;
3794 }
3795 fVars &= ~RT_BIT_32(idxVar);
3796 }
3797 }
3798
3799 return UINT8_MAX;
3800}
3801
3802
3803/**
3804 * Reassigns a variable to a different register specified by the caller.
3805 *
3806 * @returns The new code buffer position.
3807 * @param pReNative The native recompile state.
3808 * @param off The current code buffer position.
3809 * @param idxVar The variable index.
3810 * @param idxRegOld The old host register number.
3811 * @param idxRegNew The new host register number.
3812 * @param pszCaller The caller for logging.
3813 */
3814static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3815 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3816{
3817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3818 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3819 RT_NOREF(pszCaller);
3820
3821 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3822
3823 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3824 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3825 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3826 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3827
3828 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3829 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3830 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3831 if (fGstRegShadows)
3832 {
3833 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3834 | RT_BIT_32(idxRegNew);
3835 while (fGstRegShadows)
3836 {
3837 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3838 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3839
3840 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3841 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3842 }
3843 }
3844
3845 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3846 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3847 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3848 return off;
3849}
3850
3851
3852/**
3853 * Moves a variable to a different register or spills it onto the stack.
3854 *
3855 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3856 * kinds can easily be recreated if needed later.
3857 *
3858 * @returns The new code buffer position.
3859 * @param pReNative The native recompile state.
3860 * @param off The current code buffer position.
3861 * @param idxVar The variable index.
3862 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3863 * call-volatile registers.
3864 */
3865static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3866 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3867{
3868 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3869 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3870 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3871 Assert(!pVar->fRegAcquired);
3872
3873 uint8_t const idxRegOld = pVar->idxReg;
3874 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3875 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3876 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3877 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3878 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3879 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3880 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3881 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3882
3883
3884 /** @todo Add statistics on this.*/
3885 /** @todo Implement basic variable liveness analysis (python) so variables
3886 * can be freed immediately once no longer used. This has the potential to
3887 * be trashing registers and stack for dead variables.
3888 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3889
3890 /*
3891 * First try move it to a different register, as that's cheaper.
3892 */
3893 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3894 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3895 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3896 if (fRegs)
3897 {
3898 /* Avoid using shadow registers, if possible. */
3899 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3900 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3901 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3902 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3903 }
3904
3905 /*
3906 * Otherwise we must spill the register onto the stack.
3907 */
3908 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3909 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3910 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3911 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3912
3913 pVar->idxReg = UINT8_MAX;
3914 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3915 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3916 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3917 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3918 return off;
3919}
3920
3921
3922/**
3923 * Allocates a temporary host general purpose register.
3924 *
3925 * This may emit code to save register content onto the stack in order to free
3926 * up a register.
3927 *
3928 * @returns The host register number; throws VBox status code on failure,
3929 * so no need to check the return value.
3930 * @param pReNative The native recompile state.
3931 * @param poff Pointer to the variable with the code buffer position.
3932 * This will be update if we need to move a variable from
3933 * register to stack in order to satisfy the request.
3934 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3935 * registers (@c true, default) or the other way around
3936 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3937 */
3938DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3939{
3940 /*
3941 * Try find a completely unused register, preferably a call-volatile one.
3942 */
3943 uint8_t idxReg;
3944 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3945 & ~pReNative->Core.bmHstRegsWithGstShadow
3946 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3947 if (fRegs)
3948 {
3949 if (fPreferVolatile)
3950 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3951 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3952 else
3953 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3954 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3955 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3956 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3957 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3958 }
3959 else
3960 {
3961 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3962 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3963 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3964 }
3965 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3966}
3967
3968
3969/**
3970 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3971 * registers.
3972 *
3973 * @returns The host register number; throws VBox status code on failure,
3974 * so no need to check the return value.
3975 * @param pReNative The native recompile state.
3976 * @param poff Pointer to the variable with the code buffer position.
3977 * This will be update if we need to move a variable from
3978 * register to stack in order to satisfy the request.
3979 * @param fRegMask Mask of acceptable registers.
3980 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3981 * registers (@c true, default) or the other way around
3982 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3983 */
3984DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3985 bool fPreferVolatile /*= true*/)
3986{
3987 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3988 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3989
3990 /*
3991 * Try find a completely unused register, preferably a call-volatile one.
3992 */
3993 uint8_t idxReg;
3994 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3995 & ~pReNative->Core.bmHstRegsWithGstShadow
3996 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3997 & fRegMask;
3998 if (fRegs)
3999 {
4000 if (fPreferVolatile)
4001 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4002 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4003 else
4004 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4005 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4006 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4007 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4008 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4009 }
4010 else
4011 {
4012 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4013 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4014 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4015 }
4016 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4017}
4018
4019
4020/**
4021 * Allocates a temporary register for loading an immediate value into.
4022 *
4023 * This will emit code to load the immediate, unless there happens to be an
4024 * unused register with the value already loaded.
4025 *
4026 * The caller will not modify the returned register, it must be considered
4027 * read-only. Free using iemNativeRegFreeTmpImm.
4028 *
4029 * @returns The host register number; throws VBox status code on failure, so no
4030 * need to check the return value.
4031 * @param pReNative The native recompile state.
4032 * @param poff Pointer to the variable with the code buffer position.
4033 * @param uImm The immediate value that the register must hold upon
4034 * return.
4035 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4036 * registers (@c true, default) or the other way around
4037 * (@c false).
4038 *
4039 * @note Reusing immediate values has not been implemented yet.
4040 */
4041DECL_HIDDEN_THROW(uint8_t)
4042iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4043{
4044 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4045 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4046 return idxReg;
4047}
4048
4049#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4050
4051# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4052/**
4053 * Helper for iemNativeLivenessGetStateByGstReg.
4054 *
4055 * @returns IEMLIVENESS_STATE_XXX
4056 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4057 * ORed together.
4058 */
4059DECL_FORCE_INLINE(uint32_t)
4060iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4061{
4062 /* INPUT trumps anything else. */
4063 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4064 return IEMLIVENESS_STATE_INPUT;
4065
4066 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4067 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4068 {
4069 /* If not all sub-fields are clobbered they must be considered INPUT. */
4070 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4071 return IEMLIVENESS_STATE_INPUT;
4072 return IEMLIVENESS_STATE_CLOBBERED;
4073 }
4074
4075 /* XCPT_OR_CALL trumps UNUSED. */
4076 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4077 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4078
4079 return IEMLIVENESS_STATE_UNUSED;
4080}
4081# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4082
4083
4084DECL_FORCE_INLINE(uint32_t)
4085iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4086{
4087# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4088 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4089 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4090# else
4091 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4092 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4093 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4094 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4095# endif
4096}
4097
4098
4099DECL_FORCE_INLINE(uint32_t)
4100iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4101{
4102 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4103 if (enmGstReg == kIemNativeGstReg_EFlags)
4104 {
4105 /* Merge the eflags states to one. */
4106# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4107 uRet = RT_BIT_32(uRet);
4108 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4109 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4110 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4111 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4112 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4113 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4114 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4115# else
4116 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4117 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4118 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4119 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4120 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4121 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4122 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4123# endif
4124 }
4125 return uRet;
4126}
4127
4128
4129# ifdef VBOX_STRICT
4130/** For assertions only, user checks that idxCurCall isn't zerow. */
4131DECL_FORCE_INLINE(uint32_t)
4132iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4133{
4134 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4135}
4136# endif /* VBOX_STRICT */
4137
4138#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4139
4140/**
4141 * Marks host register @a idxHstReg as containing a shadow copy of guest
4142 * register @a enmGstReg.
4143 *
4144 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4145 * host register before calling.
4146 */
4147DECL_FORCE_INLINE(void)
4148iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4149{
4150 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4151 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4152 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4153
4154 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4155 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4156 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4157 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4158#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4159 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4160 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4161#else
4162 RT_NOREF(off);
4163#endif
4164}
4165
4166
4167/**
4168 * Clear any guest register shadow claims from @a idxHstReg.
4169 *
4170 * The register does not need to be shadowing any guest registers.
4171 */
4172DECL_FORCE_INLINE(void)
4173iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4174{
4175 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4176 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4177 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4178 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4179 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4180
4181#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4182 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4183 if (fGstRegs)
4184 {
4185 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4186 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4187 while (fGstRegs)
4188 {
4189 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4190 fGstRegs &= ~RT_BIT_64(iGstReg);
4191 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4192 }
4193 }
4194#else
4195 RT_NOREF(off);
4196#endif
4197
4198 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4199 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4200 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4201}
4202
4203
4204/**
4205 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4206 * and global overview flags.
4207 */
4208DECL_FORCE_INLINE(void)
4209iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4210{
4211 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4212 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4213 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4214 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4215 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4216 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4217 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4218
4219#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4220 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4221 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4222#else
4223 RT_NOREF(off);
4224#endif
4225
4226 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4227 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4228 if (!fGstRegShadowsNew)
4229 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4230 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4231}
4232
4233
4234#if 0 /* unused */
4235/**
4236 * Clear any guest register shadow claim for @a enmGstReg.
4237 */
4238DECL_FORCE_INLINE(void)
4239iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4240{
4241 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4242 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4243 {
4244 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4245 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4246 }
4247}
4248#endif
4249
4250
4251/**
4252 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4253 * as the new shadow of it.
4254 *
4255 * Unlike the other guest reg shadow helpers, this does the logging for you.
4256 * However, it is the liveness state is not asserted here, the caller must do
4257 * that.
4258 */
4259DECL_FORCE_INLINE(void)
4260iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4261 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4262{
4263 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4264 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4265 {
4266 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4267 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4268 if (idxHstRegOld == idxHstRegNew)
4269 return;
4270 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4271 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4272 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4273 }
4274 else
4275 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4276 g_aGstShadowInfo[enmGstReg].pszName));
4277 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4278}
4279
4280
4281/**
4282 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4283 * to @a idxRegTo.
4284 */
4285DECL_FORCE_INLINE(void)
4286iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4287 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4288{
4289 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4290 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4291 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4292 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4293 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4294 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4295 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4296 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4297 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4298
4299 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4300 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4301 if (!fGstRegShadowsFrom)
4302 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4303 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4304 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4305 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4306#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4307 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4308 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4309#else
4310 RT_NOREF(off);
4311#endif
4312}
4313
4314
4315/**
4316 * Allocates a temporary host general purpose register for keeping a guest
4317 * register value.
4318 *
4319 * Since we may already have a register holding the guest register value,
4320 * code will be emitted to do the loading if that's not the case. Code may also
4321 * be emitted if we have to free up a register to satify the request.
4322 *
4323 * @returns The host register number; throws VBox status code on failure, so no
4324 * need to check the return value.
4325 * @param pReNative The native recompile state.
4326 * @param poff Pointer to the variable with the code buffer
4327 * position. This will be update if we need to move a
4328 * variable from register to stack in order to satisfy
4329 * the request.
4330 * @param enmGstReg The guest register that will is to be updated.
4331 * @param enmIntendedUse How the caller will be using the host register.
4332 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4333 * register is okay (default). The ASSUMPTION here is
4334 * that the caller has already flushed all volatile
4335 * registers, so this is only applied if we allocate a
4336 * new register.
4337 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4338 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4339 */
4340DECL_HIDDEN_THROW(uint8_t)
4341iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4342 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4343 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4344{
4345 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4346#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4347 AssertMsg( fSkipLivenessAssert
4348 || pReNative->idxCurCall == 0
4349 || enmGstReg == kIemNativeGstReg_Pc
4350 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4351 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4352 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4353 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4354 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4355 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4356#endif
4357 RT_NOREF(fSkipLivenessAssert);
4358#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4359 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4360#endif
4361 uint32_t const fRegMask = !fNoVolatileRegs
4362 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4363 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4364
4365 /*
4366 * First check if the guest register value is already in a host register.
4367 */
4368 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4369 {
4370 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4371 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4372 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4373 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4374
4375 /* It's not supposed to be allocated... */
4376 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4377 {
4378 /*
4379 * If the register will trash the guest shadow copy, try find a
4380 * completely unused register we can use instead. If that fails,
4381 * we need to disassociate the host reg from the guest reg.
4382 */
4383 /** @todo would be nice to know if preserving the register is in any way helpful. */
4384 /* If the purpose is calculations, try duplicate the register value as
4385 we'll be clobbering the shadow. */
4386 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4387 && ( ~pReNative->Core.bmHstRegs
4388 & ~pReNative->Core.bmHstRegsWithGstShadow
4389 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4390 {
4391 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4392
4393 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4394
4395 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4396 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4397 g_apszIemNativeHstRegNames[idxRegNew]));
4398 idxReg = idxRegNew;
4399 }
4400 /* If the current register matches the restrictions, go ahead and allocate
4401 it for the caller. */
4402 else if (fRegMask & RT_BIT_32(idxReg))
4403 {
4404 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4405 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4406 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4407 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4408 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4409 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4410 else
4411 {
4412 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4413 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4414 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4415 }
4416 }
4417 /* Otherwise, allocate a register that satisfies the caller and transfer
4418 the shadowing if compatible with the intended use. (This basically
4419 means the call wants a non-volatile register (RSP push/pop scenario).) */
4420 else
4421 {
4422 Assert(fNoVolatileRegs);
4423 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4424 !fNoVolatileRegs
4425 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4426 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4427 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4428 {
4429 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4430 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4431 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4432 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4433 }
4434 else
4435 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4436 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4437 g_apszIemNativeHstRegNames[idxRegNew]));
4438 idxReg = idxRegNew;
4439 }
4440 }
4441 else
4442 {
4443 /*
4444 * Oops. Shadowed guest register already allocated!
4445 *
4446 * Allocate a new register, copy the value and, if updating, the
4447 * guest shadow copy assignment to the new register.
4448 */
4449 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4450 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4451 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4452 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4453
4454 /** @todo share register for readonly access. */
4455 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4456 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4457
4458 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4459 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4460
4461 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4462 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4463 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4464 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4465 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4466 else
4467 {
4468 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4469 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4470 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4471 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4472 }
4473 idxReg = idxRegNew;
4474 }
4475 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4476
4477#ifdef VBOX_STRICT
4478 /* Strict builds: Check that the value is correct. */
4479 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4480#endif
4481
4482 return idxReg;
4483 }
4484
4485 /*
4486 * Allocate a new register, load it with the guest value and designate it as a copy of the
4487 */
4488 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4489
4490 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4491 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4492
4493 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4494 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4495 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4496 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4497
4498 return idxRegNew;
4499}
4500
4501
4502/**
4503 * Allocates a temporary host general purpose register that already holds the
4504 * given guest register value.
4505 *
4506 * The use case for this function is places where the shadowing state cannot be
4507 * modified due to branching and such. This will fail if the we don't have a
4508 * current shadow copy handy or if it's incompatible. The only code that will
4509 * be emitted here is value checking code in strict builds.
4510 *
4511 * The intended use can only be readonly!
4512 *
4513 * @returns The host register number, UINT8_MAX if not present.
4514 * @param pReNative The native recompile state.
4515 * @param poff Pointer to the instruction buffer offset.
4516 * Will be updated in strict builds if a register is
4517 * found.
4518 * @param enmGstReg The guest register that will is to be updated.
4519 * @note In strict builds, this may throw instruction buffer growth failures.
4520 * Non-strict builds will not throw anything.
4521 * @sa iemNativeRegAllocTmpForGuestReg
4522 */
4523DECL_HIDDEN_THROW(uint8_t)
4524iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4525{
4526 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4527#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4528 AssertMsg( pReNative->idxCurCall == 0
4529 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4530 || enmGstReg == kIemNativeGstReg_Pc,
4531 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4532#endif
4533
4534 /*
4535 * First check if the guest register value is already in a host register.
4536 */
4537 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4538 {
4539 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4540 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4541 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4542 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4543
4544 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4545 {
4546 /*
4547 * We only do readonly use here, so easy compared to the other
4548 * variant of this code.
4549 */
4550 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4551 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4552 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4553 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4554 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4555
4556#ifdef VBOX_STRICT
4557 /* Strict builds: Check that the value is correct. */
4558 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4559#else
4560 RT_NOREF(poff);
4561#endif
4562 return idxReg;
4563 }
4564 }
4565
4566 return UINT8_MAX;
4567}
4568
4569
4570/**
4571 * Allocates argument registers for a function call.
4572 *
4573 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4574 * need to check the return value.
4575 * @param pReNative The native recompile state.
4576 * @param off The current code buffer offset.
4577 * @param cArgs The number of arguments the function call takes.
4578 */
4579DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4580{
4581 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4582 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4583 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4584 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4585
4586 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4587 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4588 else if (cArgs == 0)
4589 return true;
4590
4591 /*
4592 * Do we get luck and all register are free and not shadowing anything?
4593 */
4594 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4595 for (uint32_t i = 0; i < cArgs; i++)
4596 {
4597 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4598 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4599 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4600 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4601 }
4602 /*
4603 * Okay, not lucky so we have to free up the registers.
4604 */
4605 else
4606 for (uint32_t i = 0; i < cArgs; i++)
4607 {
4608 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4609 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4610 {
4611 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4612 {
4613 case kIemNativeWhat_Var:
4614 {
4615 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4616 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4617 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4618 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4619 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4620
4621 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4622 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4623 else
4624 {
4625 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4626 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4627 }
4628 break;
4629 }
4630
4631 case kIemNativeWhat_Tmp:
4632 case kIemNativeWhat_Arg:
4633 case kIemNativeWhat_rc:
4634 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4635 default:
4636 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4637 }
4638
4639 }
4640 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4641 {
4642 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4643 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4644 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4645 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4646 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4647 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4648 }
4649 else
4650 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4651 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4652 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4653 }
4654 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4655 return true;
4656}
4657
4658
4659DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4660
4661
4662#if 0
4663/**
4664 * Frees a register assignment of any type.
4665 *
4666 * @param pReNative The native recompile state.
4667 * @param idxHstReg The register to free.
4668 *
4669 * @note Does not update variables.
4670 */
4671DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4672{
4673 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4674 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4675 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4676 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4677 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4678 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4679 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4680 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4681 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4682 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4683 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4684 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4685 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4686 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4687
4688 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4689 /* no flushing, right:
4690 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4691 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4692 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4693 */
4694}
4695#endif
4696
4697
4698/**
4699 * Frees a temporary register.
4700 *
4701 * Any shadow copies of guest registers assigned to the host register will not
4702 * be flushed by this operation.
4703 */
4704DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4705{
4706 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4707 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4708 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4709 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4710 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4711}
4712
4713
4714/**
4715 * Frees a temporary immediate register.
4716 *
4717 * It is assumed that the call has not modified the register, so it still hold
4718 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4719 */
4720DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4721{
4722 iemNativeRegFreeTmp(pReNative, idxHstReg);
4723}
4724
4725
4726/**
4727 * Frees a register assigned to a variable.
4728 *
4729 * The register will be disassociated from the variable.
4730 */
4731DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4732{
4733 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4734 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4735 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4736 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4737 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4738
4739 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4740 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4741 if (!fFlushShadows)
4742 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4743 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4744 else
4745 {
4746 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4747 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4748 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4749 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4750 uint64_t fGstRegShadows = fGstRegShadowsOld;
4751 while (fGstRegShadows)
4752 {
4753 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4754 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4755
4756 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4757 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4758 }
4759 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4760 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4761 }
4762}
4763
4764
4765/**
4766 * Called right before emitting a call instruction to move anything important
4767 * out of call-volatile registers, free and flush the call-volatile registers,
4768 * optionally freeing argument variables.
4769 *
4770 * @returns New code buffer offset, UINT32_MAX on failure.
4771 * @param pReNative The native recompile state.
4772 * @param off The code buffer offset.
4773 * @param cArgs The number of arguments the function call takes.
4774 * It is presumed that the host register part of these have
4775 * been allocated as such already and won't need moving,
4776 * just freeing.
4777 * @param fKeepVars Mask of variables that should keep their register
4778 * assignments. Caller must take care to handle these.
4779 */
4780DECL_HIDDEN_THROW(uint32_t)
4781iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4782{
4783 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4784
4785 /* fKeepVars will reduce this mask. */
4786 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4787
4788 /*
4789 * Move anything important out of volatile registers.
4790 */
4791 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4792 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4793 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4794#ifdef IEMNATIVE_REG_FIXED_TMP0
4795 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4796#endif
4797#ifdef IEMNATIVE_REG_FIXED_TMP1
4798 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4799#endif
4800#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4801 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4802#endif
4803 & ~g_afIemNativeCallRegs[cArgs];
4804
4805 fRegsToMove &= pReNative->Core.bmHstRegs;
4806 if (!fRegsToMove)
4807 { /* likely */ }
4808 else
4809 {
4810 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4811 while (fRegsToMove != 0)
4812 {
4813 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4814 fRegsToMove &= ~RT_BIT_32(idxReg);
4815
4816 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4817 {
4818 case kIemNativeWhat_Var:
4819 {
4820 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4821 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4822 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4823 Assert(pVar->idxReg == idxReg);
4824 if (!(RT_BIT_32(idxVar) & fKeepVars))
4825 {
4826 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4827 idxVar, pVar->enmKind, pVar->idxReg));
4828 if (pVar->enmKind != kIemNativeVarKind_Stack)
4829 pVar->idxReg = UINT8_MAX;
4830 else
4831 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4832 }
4833 else
4834 fRegsToFree &= ~RT_BIT_32(idxReg);
4835 continue;
4836 }
4837
4838 case kIemNativeWhat_Arg:
4839 AssertMsgFailed(("What?!?: %u\n", idxReg));
4840 continue;
4841
4842 case kIemNativeWhat_rc:
4843 case kIemNativeWhat_Tmp:
4844 AssertMsgFailed(("Missing free: %u\n", idxReg));
4845 continue;
4846
4847 case kIemNativeWhat_FixedTmp:
4848 case kIemNativeWhat_pVCpuFixed:
4849 case kIemNativeWhat_pCtxFixed:
4850 case kIemNativeWhat_PcShadow:
4851 case kIemNativeWhat_FixedReserved:
4852 case kIemNativeWhat_Invalid:
4853 case kIemNativeWhat_End:
4854 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4855 }
4856 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4857 }
4858 }
4859
4860 /*
4861 * Do the actual freeing.
4862 */
4863 if (pReNative->Core.bmHstRegs & fRegsToFree)
4864 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4865 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4866 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4867
4868 /* If there are guest register shadows in any call-volatile register, we
4869 have to clear the corrsponding guest register masks for each register. */
4870 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4871 if (fHstRegsWithGstShadow)
4872 {
4873 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4874 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4875 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4876 do
4877 {
4878 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4879 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4880
4881 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4882 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4883 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4884 } while (fHstRegsWithGstShadow != 0);
4885 }
4886
4887 return off;
4888}
4889
4890
4891/**
4892 * Flushes a set of guest register shadow copies.
4893 *
4894 * This is usually done after calling a threaded function or a C-implementation
4895 * of an instruction.
4896 *
4897 * @param pReNative The native recompile state.
4898 * @param fGstRegs Set of guest registers to flush.
4899 */
4900DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4901{
4902 /*
4903 * Reduce the mask by what's currently shadowed
4904 */
4905 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4906 fGstRegs &= bmGstRegShadowsOld;
4907 if (fGstRegs)
4908 {
4909 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4910 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4911 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4912 if (bmGstRegShadowsNew)
4913 {
4914 /*
4915 * Partial.
4916 */
4917 do
4918 {
4919 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4920 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4921 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4922 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4923 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4924
4925 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4926 fGstRegs &= ~fInThisHstReg;
4927 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4928 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4929 if (!fGstRegShadowsNew)
4930 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4931 } while (fGstRegs != 0);
4932 }
4933 else
4934 {
4935 /*
4936 * Clear all.
4937 */
4938 do
4939 {
4940 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4941 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4942 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4943 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4944 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4945
4946 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4947 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4948 } while (fGstRegs != 0);
4949 pReNative->Core.bmHstRegsWithGstShadow = 0;
4950 }
4951 }
4952}
4953
4954
4955/**
4956 * Flushes guest register shadow copies held by a set of host registers.
4957 *
4958 * This is used with the TLB lookup code for ensuring that we don't carry on
4959 * with any guest shadows in volatile registers, as these will get corrupted by
4960 * a TLB miss.
4961 *
4962 * @param pReNative The native recompile state.
4963 * @param fHstRegs Set of host registers to flush guest shadows for.
4964 */
4965DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4966{
4967 /*
4968 * Reduce the mask by what's currently shadowed.
4969 */
4970 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4971 fHstRegs &= bmHstRegsWithGstShadowOld;
4972 if (fHstRegs)
4973 {
4974 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4975 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4976 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4977 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4978 if (bmHstRegsWithGstShadowNew)
4979 {
4980 /*
4981 * Partial (likely).
4982 */
4983 uint64_t fGstShadows = 0;
4984 do
4985 {
4986 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4987 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4988 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4989 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4990
4991 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4992 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4993 fHstRegs &= ~RT_BIT_32(idxHstReg);
4994 } while (fHstRegs != 0);
4995 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4996 }
4997 else
4998 {
4999 /*
5000 * Clear all.
5001 */
5002 do
5003 {
5004 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5005 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5006 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5007 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5008
5009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5010 fHstRegs &= ~RT_BIT_32(idxHstReg);
5011 } while (fHstRegs != 0);
5012 pReNative->Core.bmGstRegShadows = 0;
5013 }
5014 }
5015}
5016
5017
5018/**
5019 * Restores guest shadow copies in volatile registers.
5020 *
5021 * This is used after calling a helper function (think TLB miss) to restore the
5022 * register state of volatile registers.
5023 *
5024 * @param pReNative The native recompile state.
5025 * @param off The code buffer offset.
5026 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5027 * be active (allocated) w/o asserting. Hack.
5028 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5029 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5030 */
5031DECL_HIDDEN_THROW(uint32_t)
5032iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5033{
5034 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5035 if (fHstRegs)
5036 {
5037 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5038 do
5039 {
5040 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5041
5042 /* It's not fatal if a register is active holding a variable that
5043 shadowing a guest register, ASSUMING all pending guest register
5044 writes were flushed prior to the helper call. However, we'll be
5045 emitting duplicate restores, so it wasts code space. */
5046 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5047 RT_NOREF(fHstRegsActiveShadows);
5048
5049 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5050 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5051 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5052 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5053
5054 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5055 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5056
5057 fHstRegs &= ~RT_BIT_32(idxHstReg);
5058 } while (fHstRegs != 0);
5059 }
5060 return off;
5061}
5062
5063
5064#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5065# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5066static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5067{
5068 /* Compare the shadow with the context value, they should match. */
5069 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5070 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5071 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5072 return off;
5073}
5074# endif
5075
5076/**
5077 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5078 */
5079static uint32_t
5080iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5081{
5082 if (pReNative->Core.offPc)
5083 {
5084# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5085 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5086 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5087# endif
5088
5089# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5090 /* Allocate a temporary PC register. */
5091 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5092
5093 /* Perform the addition and store the result. */
5094 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5095 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5096
5097 /* Free but don't flush the PC register. */
5098 iemNativeRegFreeTmp(pReNative, idxPcReg);
5099# else
5100 /* Compare the shadow with the context value, they should match. */
5101 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5102 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5103# endif
5104
5105 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5106 pReNative->Core.offPc = 0;
5107 pReNative->Core.cInstrPcUpdateSkipped = 0;
5108 }
5109# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5110 else
5111 {
5112 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5113 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5114 }
5115# endif
5116
5117 return off;
5118}
5119#endif
5120
5121
5122/**
5123 * Flushes delayed write of a specific guest register.
5124 *
5125 * This must be called prior to calling CImpl functions and any helpers that use
5126 * the guest state (like raising exceptions) and such.
5127 *
5128 * This optimization has not yet been implemented. The first target would be
5129 * RIP updates, since these are the most common ones.
5130 */
5131DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5132 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
5133{
5134#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5135 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
5136#endif
5137 RT_NOREF(pReNative, enmClass, idxReg);
5138 return off;
5139}
5140
5141
5142/**
5143 * Flushes any delayed guest register writes.
5144 *
5145 * This must be called prior to calling CImpl functions and any helpers that use
5146 * the guest state (like raising exceptions) and such.
5147 *
5148 * This optimization has not yet been implemented. The first target would be
5149 * RIP updates, since these are the most common ones.
5150 */
5151DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/)
5152{
5153#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5154 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5155 off = iemNativeEmitPcWriteback(pReNative, off);
5156#else
5157 RT_NOREF(pReNative, fGstShwExcept);
5158#endif
5159
5160 return off;
5161}
5162
5163
5164#ifdef VBOX_STRICT
5165/**
5166 * Does internal register allocator sanity checks.
5167 */
5168static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5169{
5170 /*
5171 * Iterate host registers building a guest shadowing set.
5172 */
5173 uint64_t bmGstRegShadows = 0;
5174 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5175 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5176 while (bmHstRegsWithGstShadow)
5177 {
5178 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5179 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5180 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5181
5182 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5183 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5184 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5185 bmGstRegShadows |= fThisGstRegShadows;
5186 while (fThisGstRegShadows)
5187 {
5188 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5189 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5190 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5191 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5192 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5193 }
5194 }
5195 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5196 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5197 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5198
5199 /*
5200 * Now the other way around, checking the guest to host index array.
5201 */
5202 bmHstRegsWithGstShadow = 0;
5203 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5204 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5205 while (bmGstRegShadows)
5206 {
5207 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5208 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5209 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5210
5211 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5212 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5213 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5214 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5215 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5216 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5217 }
5218 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5219 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5220 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5221}
5222#endif
5223
5224
5225/*********************************************************************************************************************************
5226* Code Emitters (larger snippets) *
5227*********************************************************************************************************************************/
5228
5229/**
5230 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5231 * extending to 64-bit width.
5232 *
5233 * @returns New code buffer offset on success, UINT32_MAX on failure.
5234 * @param pReNative .
5235 * @param off The current code buffer position.
5236 * @param idxHstReg The host register to load the guest register value into.
5237 * @param enmGstReg The guest register to load.
5238 *
5239 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5240 * that is something the caller needs to do if applicable.
5241 */
5242DECL_HIDDEN_THROW(uint32_t)
5243iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5244{
5245 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
5246 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5247
5248 switch (g_aGstShadowInfo[enmGstReg].cb)
5249 {
5250 case sizeof(uint64_t):
5251 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5252 case sizeof(uint32_t):
5253 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5254 case sizeof(uint16_t):
5255 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5256#if 0 /* not present in the table. */
5257 case sizeof(uint8_t):
5258 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5259#endif
5260 default:
5261 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5262 }
5263}
5264
5265
5266#ifdef VBOX_STRICT
5267/**
5268 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5269 *
5270 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5271 * Trashes EFLAGS on AMD64.
5272 */
5273static uint32_t
5274iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5275{
5276# ifdef RT_ARCH_AMD64
5277 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5278
5279 /* rol reg64, 32 */
5280 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5281 pbCodeBuf[off++] = 0xc1;
5282 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5283 pbCodeBuf[off++] = 32;
5284
5285 /* test reg32, ffffffffh */
5286 if (idxReg >= 8)
5287 pbCodeBuf[off++] = X86_OP_REX_B;
5288 pbCodeBuf[off++] = 0xf7;
5289 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5290 pbCodeBuf[off++] = 0xff;
5291 pbCodeBuf[off++] = 0xff;
5292 pbCodeBuf[off++] = 0xff;
5293 pbCodeBuf[off++] = 0xff;
5294
5295 /* je/jz +1 */
5296 pbCodeBuf[off++] = 0x74;
5297 pbCodeBuf[off++] = 0x01;
5298
5299 /* int3 */
5300 pbCodeBuf[off++] = 0xcc;
5301
5302 /* rol reg64, 32 */
5303 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5304 pbCodeBuf[off++] = 0xc1;
5305 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5306 pbCodeBuf[off++] = 32;
5307
5308# elif defined(RT_ARCH_ARM64)
5309 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5310 /* lsr tmp0, reg64, #32 */
5311 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5312 /* cbz tmp0, +1 */
5313 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5314 /* brk #0x1100 */
5315 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5316
5317# else
5318# error "Port me!"
5319# endif
5320 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5321 return off;
5322}
5323#endif /* VBOX_STRICT */
5324
5325
5326#ifdef VBOX_STRICT
5327/**
5328 * Emitting code that checks that the content of register @a idxReg is the same
5329 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5330 * instruction if that's not the case.
5331 *
5332 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5333 * Trashes EFLAGS on AMD64.
5334 */
5335static uint32_t
5336iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5337{
5338# ifdef RT_ARCH_AMD64
5339 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5340
5341 /* cmp reg, [mem] */
5342 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5343 {
5344 if (idxReg >= 8)
5345 pbCodeBuf[off++] = X86_OP_REX_R;
5346 pbCodeBuf[off++] = 0x38;
5347 }
5348 else
5349 {
5350 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5351 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5352 else
5353 {
5354 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5355 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5356 else
5357 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5358 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5359 if (idxReg >= 8)
5360 pbCodeBuf[off++] = X86_OP_REX_R;
5361 }
5362 pbCodeBuf[off++] = 0x39;
5363 }
5364 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5365
5366 /* je/jz +1 */
5367 pbCodeBuf[off++] = 0x74;
5368 pbCodeBuf[off++] = 0x01;
5369
5370 /* int3 */
5371 pbCodeBuf[off++] = 0xcc;
5372
5373 /* For values smaller than the register size, we must check that the rest
5374 of the register is all zeros. */
5375 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5376 {
5377 /* test reg64, imm32 */
5378 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5379 pbCodeBuf[off++] = 0xf7;
5380 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5381 pbCodeBuf[off++] = 0;
5382 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5383 pbCodeBuf[off++] = 0xff;
5384 pbCodeBuf[off++] = 0xff;
5385
5386 /* je/jz +1 */
5387 pbCodeBuf[off++] = 0x74;
5388 pbCodeBuf[off++] = 0x01;
5389
5390 /* int3 */
5391 pbCodeBuf[off++] = 0xcc;
5392 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5393 }
5394 else
5395 {
5396 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5397 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5398 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5399 }
5400
5401# elif defined(RT_ARCH_ARM64)
5402 /* mov TMP0, [gstreg] */
5403 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5404
5405 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5406 /* sub tmp0, tmp0, idxReg */
5407 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5408 /* cbz tmp0, +1 */
5409 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5410 /* brk #0x1000+enmGstReg */
5411 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5412 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5413
5414# else
5415# error "Port me!"
5416# endif
5417 return off;
5418}
5419#endif /* VBOX_STRICT */
5420
5421
5422#ifdef VBOX_STRICT
5423/**
5424 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5425 * important bits.
5426 *
5427 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5428 * Trashes EFLAGS on AMD64.
5429 */
5430static uint32_t
5431iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5432{
5433 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5434 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5435 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5436 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5437
5438#ifdef RT_ARCH_AMD64
5439 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5440
5441 /* je/jz +1 */
5442 pbCodeBuf[off++] = 0x74;
5443 pbCodeBuf[off++] = 0x01;
5444
5445 /* int3 */
5446 pbCodeBuf[off++] = 0xcc;
5447
5448# elif defined(RT_ARCH_ARM64)
5449 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5450
5451 /* b.eq +1 */
5452 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5453 /* brk #0x2000 */
5454 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5455
5456# else
5457# error "Port me!"
5458# endif
5459 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5460
5461 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5462 return off;
5463}
5464#endif /* VBOX_STRICT */
5465
5466
5467/**
5468 * Emits a code for checking the return code of a call and rcPassUp, returning
5469 * from the code if either are non-zero.
5470 */
5471DECL_HIDDEN_THROW(uint32_t)
5472iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5473{
5474#ifdef RT_ARCH_AMD64
5475 /*
5476 * AMD64: eax = call status code.
5477 */
5478
5479 /* edx = rcPassUp */
5480 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5481# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5482 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5483# endif
5484
5485 /* edx = eax | rcPassUp */
5486 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5487 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5488 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5489 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5490
5491 /* Jump to non-zero status return path. */
5492 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5493
5494 /* done. */
5495
5496#elif RT_ARCH_ARM64
5497 /*
5498 * ARM64: w0 = call status code.
5499 */
5500# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5501 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5502# endif
5503 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5504
5505 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5506
5507 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5508
5509 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5510 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5511 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5512
5513#else
5514# error "port me"
5515#endif
5516 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5517 RT_NOREF_PV(idxInstr);
5518 return off;
5519}
5520
5521
5522/**
5523 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5524 * raising a \#GP(0) if it isn't.
5525 *
5526 * @returns New code buffer offset, UINT32_MAX on failure.
5527 * @param pReNative The native recompile state.
5528 * @param off The code buffer offset.
5529 * @param idxAddrReg The host register with the address to check.
5530 * @param idxInstr The current instruction.
5531 */
5532DECL_HIDDEN_THROW(uint32_t)
5533iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5534{
5535 /*
5536 * Make sure we don't have any outstanding guest register writes as we may
5537 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5538 */
5539 off = iemNativeRegFlushPendingWrites(pReNative, off);
5540
5541#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5542 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5543#else
5544 RT_NOREF(idxInstr);
5545#endif
5546
5547#ifdef RT_ARCH_AMD64
5548 /*
5549 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5550 * return raisexcpt();
5551 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5552 */
5553 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5554
5555 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5556 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5557 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5558 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5559 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5560
5561 iemNativeRegFreeTmp(pReNative, iTmpReg);
5562
5563#elif defined(RT_ARCH_ARM64)
5564 /*
5565 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5566 * return raisexcpt();
5567 * ----
5568 * mov x1, 0x800000000000
5569 * add x1, x0, x1
5570 * cmp xzr, x1, lsr 48
5571 * b.ne .Lraisexcpt
5572 */
5573 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5574
5575 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5576 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5577 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5578 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5579
5580 iemNativeRegFreeTmp(pReNative, iTmpReg);
5581
5582#else
5583# error "Port me"
5584#endif
5585 return off;
5586}
5587
5588
5589/**
5590 * Emits code to check if that the content of @a idxAddrReg is within the limit
5591 * of CS, raising a \#GP(0) if it isn't.
5592 *
5593 * @returns New code buffer offset; throws VBox status code on error.
5594 * @param pReNative The native recompile state.
5595 * @param off The code buffer offset.
5596 * @param idxAddrReg The host register (32-bit) with the address to
5597 * check.
5598 * @param idxInstr The current instruction.
5599 */
5600DECL_HIDDEN_THROW(uint32_t)
5601iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5602 uint8_t idxAddrReg, uint8_t idxInstr)
5603{
5604 /*
5605 * Make sure we don't have any outstanding guest register writes as we may
5606 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5607 */
5608 off = iemNativeRegFlushPendingWrites(pReNative, off);
5609
5610#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5611 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5612#else
5613 RT_NOREF(idxInstr);
5614#endif
5615
5616 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5617 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
5618 kIemNativeGstRegUse_ReadOnly);
5619
5620 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
5621 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5622
5623 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
5624 return off;
5625}
5626
5627
5628/**
5629 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5630 *
5631 * @returns The flush mask.
5632 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5633 * @param fGstShwFlush The starting flush mask.
5634 */
5635DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5636{
5637 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5638 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5639 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5640 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5641 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5642 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5643 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5644 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5645 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5646 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5647 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5648 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5649 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5650 return fGstShwFlush;
5651}
5652
5653
5654/**
5655 * Emits a call to a CImpl function or something similar.
5656 */
5657DECL_HIDDEN_THROW(uint32_t)
5658iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5659 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5660{
5661 /* Writeback everything. */
5662 off = iemNativeRegFlushPendingWrites(pReNative, off);
5663
5664 /*
5665 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5666 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5667 */
5668 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5669 fGstShwFlush
5670 | RT_BIT_64(kIemNativeGstReg_Pc)
5671 | RT_BIT_64(kIemNativeGstReg_EFlags));
5672 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5673
5674 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5675
5676 /*
5677 * Load the parameters.
5678 */
5679#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5680 /* Special code the hidden VBOXSTRICTRC pointer. */
5681 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5682 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5683 if (cAddParams > 0)
5684 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5685 if (cAddParams > 1)
5686 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5687 if (cAddParams > 2)
5688 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5689 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5690
5691#else
5692 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5693 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5694 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5695 if (cAddParams > 0)
5696 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5697 if (cAddParams > 1)
5698 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5699 if (cAddParams > 2)
5700# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5701 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5702# else
5703 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5704# endif
5705#endif
5706
5707 /*
5708 * Make the call.
5709 */
5710 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5711
5712#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5713 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5714#endif
5715
5716 /*
5717 * Check the status code.
5718 */
5719 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5720}
5721
5722
5723/**
5724 * Emits a call to a threaded worker function.
5725 */
5726DECL_HIDDEN_THROW(uint32_t)
5727iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5728{
5729 /* We don't know what the threaded function is doing so we must flush all pending writes. */
5730 off = iemNativeRegFlushPendingWrites(pReNative, off);
5731
5732 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5733 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5734
5735#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5736 /* The threaded function may throw / long jmp, so set current instruction
5737 number if we're counting. */
5738 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5739#endif
5740
5741 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5742
5743#ifdef RT_ARCH_AMD64
5744 /* Load the parameters and emit the call. */
5745# ifdef RT_OS_WINDOWS
5746# ifndef VBOXSTRICTRC_STRICT_ENABLED
5747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5748 if (cParams > 0)
5749 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5750 if (cParams > 1)
5751 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5752 if (cParams > 2)
5753 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5754# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5756 if (cParams > 0)
5757 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5758 if (cParams > 1)
5759 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5760 if (cParams > 2)
5761 {
5762 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5763 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5764 }
5765 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5766# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5767# else
5768 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5769 if (cParams > 0)
5770 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5771 if (cParams > 1)
5772 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5773 if (cParams > 2)
5774 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5775# endif
5776
5777 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5778
5779# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5780 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5781# endif
5782
5783#elif RT_ARCH_ARM64
5784 /*
5785 * ARM64:
5786 */
5787 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5788 if (cParams > 0)
5789 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5790 if (cParams > 1)
5791 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5792 if (cParams > 2)
5793 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5794
5795 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5796
5797#else
5798# error "port me"
5799#endif
5800
5801 /*
5802 * Check the status code.
5803 */
5804 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5805
5806 return off;
5807}
5808
5809#ifdef VBOX_WITH_STATISTICS
5810/**
5811 * Emits code to update the thread call statistics.
5812 */
5813DECL_INLINE_THROW(uint32_t)
5814iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5815{
5816 /*
5817 * Update threaded function stats.
5818 */
5819 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
5820 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
5821# if defined(RT_ARCH_ARM64)
5822 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
5823 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
5824 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
5825 iemNativeRegFreeTmp(pReNative, idxTmp1);
5826 iemNativeRegFreeTmp(pReNative, idxTmp2);
5827# else
5828 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
5829# endif
5830 return off;
5831}
5832#endif /* VBOX_WITH_STATISTICS */
5833
5834
5835/**
5836 * Emits the code at the CheckBranchMiss label.
5837 */
5838static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5839{
5840 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5841 if (idxLabel != UINT32_MAX)
5842 {
5843 iemNativeLabelDefine(pReNative, idxLabel, off);
5844
5845 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5846 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5847 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5848
5849 /* jump back to the return sequence. */
5850 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5851 }
5852 return off;
5853}
5854
5855
5856/**
5857 * Emits the code at the NeedCsLimChecking label.
5858 */
5859static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5860{
5861 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5862 if (idxLabel != UINT32_MAX)
5863 {
5864 iemNativeLabelDefine(pReNative, idxLabel, off);
5865
5866 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5867 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5868 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5869
5870 /* jump back to the return sequence. */
5871 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5872 }
5873 return off;
5874}
5875
5876
5877/**
5878 * Emits the code at the ObsoleteTb label.
5879 */
5880static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5881{
5882 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5883 if (idxLabel != UINT32_MAX)
5884 {
5885 iemNativeLabelDefine(pReNative, idxLabel, off);
5886
5887 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5888 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5889 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5890
5891 /* jump back to the return sequence. */
5892 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5893 }
5894 return off;
5895}
5896
5897
5898/**
5899 * Emits the code at the RaiseGP0 label.
5900 */
5901static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5902{
5903 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5904 if (idxLabel != UINT32_MAX)
5905 {
5906 iemNativeLabelDefine(pReNative, idxLabel, off);
5907
5908 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5909 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5910 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5911
5912 /* jump back to the return sequence. */
5913 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5914 }
5915 return off;
5916}
5917
5918
5919/**
5920 * Emits the code at the RaiseNm label.
5921 */
5922static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5923{
5924 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
5925 if (idxLabel != UINT32_MAX)
5926 {
5927 iemNativeLabelDefine(pReNative, idxLabel, off);
5928
5929 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
5930 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5931 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
5932
5933 /* jump back to the return sequence. */
5934 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5935 }
5936 return off;
5937}
5938
5939
5940/**
5941 * Emits the code at the RaiseUd label.
5942 */
5943static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5944{
5945 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
5946 if (idxLabel != UINT32_MAX)
5947 {
5948 iemNativeLabelDefine(pReNative, idxLabel, off);
5949
5950 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
5951 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5952 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
5953
5954 /* jump back to the return sequence. */
5955 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5956 }
5957 return off;
5958}
5959
5960
5961/**
5962 * Emits the code at the RaiseMf label.
5963 */
5964static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5965{
5966 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
5967 if (idxLabel != UINT32_MAX)
5968 {
5969 iemNativeLabelDefine(pReNative, idxLabel, off);
5970
5971 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
5972 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5973 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
5974
5975 /* jump back to the return sequence. */
5976 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5977 }
5978 return off;
5979}
5980
5981
5982/**
5983 * Emits the code at the ReturnWithFlags label (returns
5984 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5985 */
5986static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5987{
5988 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5989 if (idxLabel != UINT32_MAX)
5990 {
5991 iemNativeLabelDefine(pReNative, idxLabel, off);
5992
5993 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5994
5995 /* jump back to the return sequence. */
5996 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5997 }
5998 return off;
5999}
6000
6001
6002/**
6003 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6004 */
6005static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6006{
6007 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6008 if (idxLabel != UINT32_MAX)
6009 {
6010 iemNativeLabelDefine(pReNative, idxLabel, off);
6011
6012 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6013
6014 /* jump back to the return sequence. */
6015 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6016 }
6017 return off;
6018}
6019
6020
6021/**
6022 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6023 */
6024static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6025{
6026 /*
6027 * Generate the rc + rcPassUp fiddling code if needed.
6028 */
6029 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6030 if (idxLabel != UINT32_MAX)
6031 {
6032 iemNativeLabelDefine(pReNative, idxLabel, off);
6033
6034 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6035#ifdef RT_ARCH_AMD64
6036# ifdef RT_OS_WINDOWS
6037# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6038 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6039# endif
6040 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6041 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6042# else
6043 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6044 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6045# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6046 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6047# endif
6048# endif
6049# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6050 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6051# endif
6052
6053#else
6054 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6055 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6056 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6057#endif
6058
6059 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6060 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6061 }
6062 return off;
6063}
6064
6065
6066/**
6067 * Emits a standard epilog.
6068 */
6069static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6070{
6071 *pidxReturnLabel = UINT32_MAX;
6072
6073 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6074 off = iemNativeRegFlushPendingWrites(pReNative, off);
6075
6076 /*
6077 * Successful return, so clear the return register (eax, w0).
6078 */
6079 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6080
6081 /*
6082 * Define label for common return point.
6083 */
6084 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6085 *pidxReturnLabel = idxReturn;
6086
6087 /*
6088 * Restore registers and return.
6089 */
6090#ifdef RT_ARCH_AMD64
6091 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6092
6093 /* Reposition esp at the r15 restore point. */
6094 pbCodeBuf[off++] = X86_OP_REX_W;
6095 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6096 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6097 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6098
6099 /* Pop non-volatile registers and return */
6100 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6101 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6102 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6103 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6104 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6105 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6106 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6107 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6108# ifdef RT_OS_WINDOWS
6109 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6110 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6111# endif
6112 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6113 pbCodeBuf[off++] = 0xc9; /* leave */
6114 pbCodeBuf[off++] = 0xc3; /* ret */
6115 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6116
6117#elif RT_ARCH_ARM64
6118 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6119
6120 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6121 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6122 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6123 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6124 IEMNATIVE_FRAME_VAR_SIZE / 8);
6125 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6126 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6127 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6128 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6129 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6130 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6131 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6132 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6133 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6134 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6135 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6136 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6137
6138 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6139 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6140 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6141 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6142
6143 /* retab / ret */
6144# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6145 if (1)
6146 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6147 else
6148# endif
6149 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6150
6151#else
6152# error "port me"
6153#endif
6154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6155
6156 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6157}
6158
6159
6160/**
6161 * Emits a standard prolog.
6162 */
6163static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6164{
6165#ifdef RT_ARCH_AMD64
6166 /*
6167 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6168 * reserving 64 bytes for stack variables plus 4 non-register argument
6169 * slots. Fixed register assignment: xBX = pReNative;
6170 *
6171 * Since we always do the same register spilling, we can use the same
6172 * unwind description for all the code.
6173 */
6174 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6175 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6176 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6177 pbCodeBuf[off++] = 0x8b;
6178 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6179 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6180 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6181# ifdef RT_OS_WINDOWS
6182 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6183 pbCodeBuf[off++] = 0x8b;
6184 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6185 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6186 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6187# else
6188 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6189 pbCodeBuf[off++] = 0x8b;
6190 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6191# endif
6192 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6193 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6194 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6195 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6196 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6197 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6198 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6199 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6200
6201# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6202 /* Save the frame pointer. */
6203 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6204# endif
6205
6206 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6207 X86_GREG_xSP,
6208 IEMNATIVE_FRAME_ALIGN_SIZE
6209 + IEMNATIVE_FRAME_VAR_SIZE
6210 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6211 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6212 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6213 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6214 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6215
6216#elif RT_ARCH_ARM64
6217 /*
6218 * We set up a stack frame exactly like on x86, only we have to push the
6219 * return address our selves here. We save all non-volatile registers.
6220 */
6221 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6222
6223# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6224 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6225 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6226 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6227 /* pacibsp */
6228 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6229# endif
6230
6231 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6232 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6233 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6234 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6235 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6236 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6237 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6238 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6239 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6240 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6241 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6242 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6243 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6244 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6245 /* Save the BP and LR (ret address) registers at the top of the frame. */
6246 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6247 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6248 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6249 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6250 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6251 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6252
6253 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6254 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6255
6256 /* mov r28, r0 */
6257 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6258 /* mov r27, r1 */
6259 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6260
6261# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6262 /* Save the frame pointer. */
6263 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6264 ARMV8_A64_REG_X2);
6265# endif
6266
6267#else
6268# error "port me"
6269#endif
6270 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6271 return off;
6272}
6273
6274
6275
6276
6277/*********************************************************************************************************************************
6278* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
6279*********************************************************************************************************************************/
6280
6281#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
6282 { \
6283 Assert(pReNative->Core.bmVars == 0); \
6284 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
6285 Assert(pReNative->Core.bmStack == 0); \
6286 pReNative->fMc = (a_fMcFlags); \
6287 pReNative->fCImpl = (a_fCImplFlags); \
6288 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
6289
6290/** We have to get to the end in recompilation mode, as otherwise we won't
6291 * generate code for all the IEM_MC_IF_XXX branches. */
6292#define IEM_MC_END() \
6293 iemNativeVarFreeAll(pReNative); \
6294 } return off
6295
6296
6297
6298/*********************************************************************************************************************************
6299* Native Emitter Support. *
6300*********************************************************************************************************************************/
6301
6302
6303#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
6304
6305#define IEM_MC_NATIVE_ELSE() } else {
6306
6307#define IEM_MC_NATIVE_ENDIF() } ((void)0)
6308
6309
6310#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
6311 off = a_fnEmitter(pReNative, off)
6312
6313#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
6314 off = a_fnEmitter(pReNative, off, (a0))
6315
6316#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
6317 off = a_fnEmitter(pReNative, off, (a0), (a1))
6318
6319#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
6320 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
6321
6322#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
6323 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
6324
6325#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
6326 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
6327
6328#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
6329 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
6330
6331#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
6332 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
6333
6334#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
6335 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
6336
6337
6338
6339/*********************************************************************************************************************************
6340* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
6341*********************************************************************************************************************************/
6342
6343#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
6344 pReNative->fMc = 0; \
6345 pReNative->fCImpl = (a_fFlags); \
6346 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
6347
6348
6349#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6350 pReNative->fMc = 0; \
6351 pReNative->fCImpl = (a_fFlags); \
6352 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
6353
6354DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6355 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6356 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
6357{
6358 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
6359}
6360
6361
6362#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6363 pReNative->fMc = 0; \
6364 pReNative->fCImpl = (a_fFlags); \
6365 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6366 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
6367
6368DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6369 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6370 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
6371{
6372 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
6373}
6374
6375
6376#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6377 pReNative->fMc = 0; \
6378 pReNative->fCImpl = (a_fFlags); \
6379 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6380 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
6381
6382DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6383 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6384 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
6385 uint64_t uArg2)
6386{
6387 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
6388}
6389
6390
6391
6392/*********************************************************************************************************************************
6393* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
6394*********************************************************************************************************************************/
6395
6396/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
6397 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
6398DECL_INLINE_THROW(uint32_t)
6399iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6400{
6401 /*
6402 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
6403 * return with special status code and make the execution loop deal with
6404 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
6405 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
6406 * could continue w/o interruption, it probably will drop into the
6407 * debugger, so not worth the effort of trying to services it here and we
6408 * just lump it in with the handling of the others.
6409 *
6410 * To simplify the code and the register state management even more (wrt
6411 * immediate in AND operation), we always update the flags and skip the
6412 * extra check associated conditional jump.
6413 */
6414 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
6415 <= UINT32_MAX);
6416#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6417 AssertMsg( pReNative->idxCurCall == 0
6418 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
6419 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
6420#endif
6421
6422 /*
6423 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
6424 * any pending register writes must be flushed.
6425 */
6426 off = iemNativeRegFlushPendingWrites(pReNative, off);
6427
6428 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6429 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6430 true /*fSkipLivenessAssert*/);
6431 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
6432 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
6433 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
6434 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
6435 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6436
6437 /* Free but don't flush the EFLAGS register. */
6438 iemNativeRegFreeTmp(pReNative, idxEflReg);
6439
6440 return off;
6441}
6442
6443
6444/** The VINF_SUCCESS dummy. */
6445template<int const a_rcNormal>
6446DECL_FORCE_INLINE(uint32_t)
6447iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6448{
6449 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
6450 if (a_rcNormal != VINF_SUCCESS)
6451 {
6452#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6453 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6454#else
6455 RT_NOREF_PV(idxInstr);
6456#endif
6457
6458 /* As this code returns from the TB any pending register writes must be flushed. */
6459 off = iemNativeRegFlushPendingWrites(pReNative, off);
6460
6461 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
6462 }
6463 return off;
6464}
6465
6466
6467#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
6468 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6469 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6470
6471#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6472 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6473 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6474 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6475
6476/** Same as iemRegAddToRip64AndFinishingNoFlags. */
6477DECL_INLINE_THROW(uint32_t)
6478iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6479{
6480#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6481# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6482 if (!pReNative->Core.offPc)
6483 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6484# endif
6485
6486 /* Allocate a temporary PC register. */
6487 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6488
6489 /* Perform the addition and store the result. */
6490 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
6491 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6492
6493 /* Free but don't flush the PC register. */
6494 iemNativeRegFreeTmp(pReNative, idxPcReg);
6495#endif
6496
6497#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6498 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6499
6500 pReNative->Core.offPc += cbInstr;
6501# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6502 off = iemNativePcAdjustCheck(pReNative, off);
6503# endif
6504 if (pReNative->cCondDepth)
6505 off = iemNativeEmitPcWriteback(pReNative, off);
6506 else
6507 pReNative->Core.cInstrPcUpdateSkipped++;
6508#endif
6509
6510 return off;
6511}
6512
6513
6514#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6515 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6516 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6517
6518#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6519 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6520 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6521 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6522
6523/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6524DECL_INLINE_THROW(uint32_t)
6525iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6526{
6527#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6528# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6529 if (!pReNative->Core.offPc)
6530 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6531# endif
6532
6533 /* Allocate a temporary PC register. */
6534 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6535
6536 /* Perform the addition and store the result. */
6537 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6538 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6539
6540 /* Free but don't flush the PC register. */
6541 iemNativeRegFreeTmp(pReNative, idxPcReg);
6542#endif
6543
6544#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6545 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6546
6547 pReNative->Core.offPc += cbInstr;
6548# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6549 off = iemNativePcAdjustCheck(pReNative, off);
6550# endif
6551 if (pReNative->cCondDepth)
6552 off = iemNativeEmitPcWriteback(pReNative, off);
6553 else
6554 pReNative->Core.cInstrPcUpdateSkipped++;
6555#endif
6556
6557 return off;
6558}
6559
6560
6561#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6562 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6563 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6564
6565#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6566 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6567 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6568 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6569
6570/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6571DECL_INLINE_THROW(uint32_t)
6572iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6573{
6574#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6575# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6576 if (!pReNative->Core.offPc)
6577 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6578# endif
6579
6580 /* Allocate a temporary PC register. */
6581 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6582
6583 /* Perform the addition and store the result. */
6584 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6585 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6586 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6587
6588 /* Free but don't flush the PC register. */
6589 iemNativeRegFreeTmp(pReNative, idxPcReg);
6590#endif
6591
6592#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6593 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6594
6595 pReNative->Core.offPc += cbInstr;
6596# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6597 off = iemNativePcAdjustCheck(pReNative, off);
6598# endif
6599 if (pReNative->cCondDepth)
6600 off = iemNativeEmitPcWriteback(pReNative, off);
6601 else
6602 pReNative->Core.cInstrPcUpdateSkipped++;
6603#endif
6604
6605 return off;
6606}
6607
6608
6609
6610/*********************************************************************************************************************************
6611* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6612*********************************************************************************************************************************/
6613
6614#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6615 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6616 (a_enmEffOpSize), pCallEntry->idxInstr); \
6617 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6618
6619#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6620 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6621 (a_enmEffOpSize), pCallEntry->idxInstr); \
6622 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6623 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6624
6625#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6626 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6627 IEMMODE_16BIT, pCallEntry->idxInstr); \
6628 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6629
6630#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6631 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6632 IEMMODE_16BIT, pCallEntry->idxInstr); \
6633 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6634 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6635
6636#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6637 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6638 IEMMODE_64BIT, pCallEntry->idxInstr); \
6639 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6640
6641#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6642 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6643 IEMMODE_64BIT, pCallEntry->idxInstr); \
6644 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6645 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6646
6647/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6648 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6649 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6650DECL_INLINE_THROW(uint32_t)
6651iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6652 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6653{
6654 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6655
6656 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6657 off = iemNativeRegFlushPendingWrites(pReNative, off);
6658
6659#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6660 Assert(pReNative->Core.offPc == 0);
6661
6662 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6663#endif
6664
6665 /* Allocate a temporary PC register. */
6666 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6667
6668 /* Perform the addition. */
6669 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6670
6671 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6672 {
6673 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6674 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6675 }
6676 else
6677 {
6678 /* Just truncate the result to 16-bit IP. */
6679 Assert(enmEffOpSize == IEMMODE_16BIT);
6680 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6681 }
6682 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6683
6684 /* Free but don't flush the PC register. */
6685 iemNativeRegFreeTmp(pReNative, idxPcReg);
6686
6687 return off;
6688}
6689
6690
6691#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6692 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6693 (a_enmEffOpSize), pCallEntry->idxInstr); \
6694 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6695
6696#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6697 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6698 (a_enmEffOpSize), pCallEntry->idxInstr); \
6699 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6700 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6701
6702#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6703 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6704 IEMMODE_16BIT, pCallEntry->idxInstr); \
6705 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6706
6707#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6708 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6709 IEMMODE_16BIT, pCallEntry->idxInstr); \
6710 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6711 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6712
6713#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6714 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6715 IEMMODE_32BIT, pCallEntry->idxInstr); \
6716 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6717
6718#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6719 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6720 IEMMODE_32BIT, pCallEntry->idxInstr); \
6721 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6722 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6723
6724/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6725 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6726 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6727DECL_INLINE_THROW(uint32_t)
6728iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6729 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6730{
6731 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6732
6733 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6734 off = iemNativeRegFlushPendingWrites(pReNative, off);
6735
6736#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6737 Assert(pReNative->Core.offPc == 0);
6738
6739 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6740#endif
6741
6742 /* Allocate a temporary PC register. */
6743 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6744
6745 /* Perform the addition. */
6746 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6747
6748 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6749 if (enmEffOpSize == IEMMODE_16BIT)
6750 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6751
6752 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6753/** @todo we can skip this in 32-bit FLAT mode. */
6754 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6755
6756 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6757
6758 /* Free but don't flush the PC register. */
6759 iemNativeRegFreeTmp(pReNative, idxPcReg);
6760
6761 return off;
6762}
6763
6764
6765#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6766 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6767 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6768
6769#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6770 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6771 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6772 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6773
6774#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6775 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6776 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6777
6778#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6779 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6780 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6781 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6782
6783#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6784 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6785 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6786
6787#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6788 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6789 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6790 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6791
6792/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6793DECL_INLINE_THROW(uint32_t)
6794iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6795 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6796{
6797 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6798 off = iemNativeRegFlushPendingWrites(pReNative, off);
6799
6800#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6801 Assert(pReNative->Core.offPc == 0);
6802
6803 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6804#endif
6805
6806 /* Allocate a temporary PC register. */
6807 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6808
6809 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6810 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6811 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6812 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6813 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6814
6815 /* Free but don't flush the PC register. */
6816 iemNativeRegFreeTmp(pReNative, idxPcReg);
6817
6818 return off;
6819}
6820
6821
6822
6823/*********************************************************************************************************************************
6824* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6825*********************************************************************************************************************************/
6826
6827/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6828#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6829 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6830
6831/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6832#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6833 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6834
6835/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6836#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6837 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6838
6839/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6840 * clears flags. */
6841#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6842 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6843 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6844
6845/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6846 * clears flags. */
6847#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6848 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6849 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6850
6851/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6852 * clears flags. */
6853#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6854 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6855 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6856
6857#undef IEM_MC_SET_RIP_U16_AND_FINISH
6858
6859
6860/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6861#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6862 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6863
6864/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6865#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6866 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6867
6868/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6869 * clears flags. */
6870#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6871 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6872 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6873
6874/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6875 * and clears flags. */
6876#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6877 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6878 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6879
6880#undef IEM_MC_SET_RIP_U32_AND_FINISH
6881
6882
6883/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6884#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6885 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6886
6887/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6888 * and clears flags. */
6889#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6890 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6891 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6892
6893#undef IEM_MC_SET_RIP_U64_AND_FINISH
6894
6895
6896/** Same as iemRegRipJumpU16AndFinishNoFlags,
6897 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6898DECL_INLINE_THROW(uint32_t)
6899iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6900 uint8_t idxInstr, uint8_t cbVar)
6901{
6902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6903 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
6904
6905 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6906 off = iemNativeRegFlushPendingWrites(pReNative, off);
6907
6908#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6909 Assert(pReNative->Core.offPc == 0);
6910
6911 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6912#endif
6913
6914 /* Get a register with the new PC loaded from idxVarPc.
6915 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6916 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6917
6918 /* Check limit (may #GP(0) + exit TB). */
6919 if (!f64Bit)
6920/** @todo we can skip this test in FLAT 32-bit mode. */
6921 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6922 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6923 else if (cbVar > sizeof(uint32_t))
6924 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6925
6926 /* Store the result. */
6927 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6928
6929 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6930 /** @todo implictly free the variable? */
6931
6932 return off;
6933}
6934
6935
6936
6937/*********************************************************************************************************************************
6938* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
6939*********************************************************************************************************************************/
6940
6941#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
6942 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
6943
6944/**
6945 * Emits code to check if a \#NM exception should be raised.
6946 *
6947 * @returns New code buffer offset, UINT32_MAX on failure.
6948 * @param pReNative The native recompile state.
6949 * @param off The code buffer offset.
6950 * @param idxInstr The current instruction.
6951 */
6952DECL_INLINE_THROW(uint32_t)
6953iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6954{
6955 /*
6956 * Make sure we don't have any outstanding guest register writes as we may
6957 * raise an #NM and all guest register must be up to date in CPUMCTX.
6958 *
6959 * @todo r=aeichner Can we postpone this to the RaiseNm path?
6960 */
6961 off = iemNativeRegFlushPendingWrites(pReNative, off);
6962
6963#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6964 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6965#else
6966 RT_NOREF(idxInstr);
6967#endif
6968
6969 /* Allocate a temporary CR0 register. */
6970 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
6971 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
6972
6973 /*
6974 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
6975 * return raisexcpt();
6976 */
6977 /* Test and jump. */
6978 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
6979
6980 /* Free but don't flush the CR0 register. */
6981 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
6982
6983 return off;
6984}
6985
6986
6987#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
6988 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
6989
6990/**
6991 * Emits code to check if a \#MF exception should be raised.
6992 *
6993 * @returns New code buffer offset, UINT32_MAX on failure.
6994 * @param pReNative The native recompile state.
6995 * @param off The code buffer offset.
6996 * @param idxInstr The current instruction.
6997 */
6998DECL_INLINE_THROW(uint32_t)
6999iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7000{
7001 /*
7002 * Make sure we don't have any outstanding guest register writes as we may
7003 * raise an #MF and all guest register must be up to date in CPUMCTX.
7004 *
7005 * @todo r=aeichner Can we postpone this to the RaiseMf path?
7006 */
7007 off = iemNativeRegFlushPendingWrites(pReNative, off);
7008
7009#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7010 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7011#else
7012 RT_NOREF(idxInstr);
7013#endif
7014
7015 /* Allocate a temporary FSW register. */
7016 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
7017 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
7018
7019 /*
7020 * if (FSW & X86_FSW_ES != 0)
7021 * return raisexcpt();
7022 */
7023 /* Test and jump. */
7024 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
7025
7026 /* Free but don't flush the FSW register. */
7027 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
7028
7029 return off;
7030}
7031
7032
7033#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
7034 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
7035
7036/**
7037 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
7038 *
7039 * @returns New code buffer offset, UINT32_MAX on failure.
7040 * @param pReNative The native recompile state.
7041 * @param off The code buffer offset.
7042 * @param idxInstr The current instruction.
7043 */
7044DECL_INLINE_THROW(uint32_t)
7045iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7046{
7047 /*
7048 * Make sure we don't have any outstanding guest register writes as we may
7049 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
7050 *
7051 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
7052 */
7053 off = iemNativeRegFlushPendingWrites(pReNative, off);
7054
7055#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7056 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7057#else
7058 RT_NOREF(idxInstr);
7059#endif
7060
7061 /* Allocate a temporary CR0 and CR4 register. */
7062 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
7063 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
7064 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
7065 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
7066
7067 /** @todo r=aeichner Optimize this more later to have less compares and branches,
7068 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
7069 * actual performance benefit first). */
7070 /*
7071 * if (cr0 & X86_CR0_EM)
7072 * return raisexcpt();
7073 */
7074 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM, idxLabelRaiseUd);
7075 /*
7076 * if (!(cr4 & X86_CR4_OSFXSR))
7077 * return raisexcpt();
7078 */
7079 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR, idxLabelRaiseUd);
7080 /*
7081 * if (cr0 & X86_CR0_TS)
7082 * return raisexcpt();
7083 */
7084 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_TS, idxLabelRaiseNm);
7085
7086 /* Free but don't flush the CR0 and CR4 register. */
7087 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
7088 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
7089
7090 return off;
7091}
7092
7093
7094#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
7095 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
7096
7097/**
7098 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
7099 *
7100 * @returns New code buffer offset, UINT32_MAX on failure.
7101 * @param pReNative The native recompile state.
7102 * @param off The code buffer offset.
7103 * @param idxInstr The current instruction.
7104 */
7105DECL_INLINE_THROW(uint32_t)
7106iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7107{
7108 /*
7109 * Make sure we don't have any outstanding guest register writes as we may
7110 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
7111 *
7112 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
7113 */
7114 off = iemNativeRegFlushPendingWrites(pReNative, off);
7115
7116#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7117 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7118#else
7119 RT_NOREF(idxInstr);
7120#endif
7121
7122 /* Allocate a temporary CR0, CR4 and XCR0 register. */
7123 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
7124 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
7125 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
7126 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
7127 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
7128
7129#if 1
7130 off = iemNativeEmitBrk(pReNative, off, 0x4223); /** @todo Test this when AVX gets actually available. */
7131#endif
7132
7133 /** @todo r=aeichner Optimize this more later to have less compares and branches,
7134 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
7135 * actual performance benefit first). */
7136 /*
7137 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
7138 * return raisexcpt();
7139 */
7140 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
7141 off = iemNativeEmitOrGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
7142 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
7143 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7144
7145 /*
7146 * if (!(cr4 & X86_CR4_OSXSAVE))
7147 * return raisexcpt();
7148 */
7149 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE, idxLabelRaiseUd);
7150 /*
7151 * if (cr0 & X86_CR0_TS)
7152 * return raisexcpt();
7153 */
7154 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_TS, idxLabelRaiseNm);
7155
7156 /* Free but don't flush the CR0, CR4 and XCR0 register. */
7157 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
7158 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
7159 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
7160
7161 return off;
7162}
7163
7164
7165
7166/*********************************************************************************************************************************
7167* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
7168*********************************************************************************************************************************/
7169
7170/**
7171 * Pushes an IEM_MC_IF_XXX onto the condition stack.
7172 *
7173 * @returns Pointer to the condition stack entry on success, NULL on failure
7174 * (too many nestings)
7175 */
7176DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
7177{
7178#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7179 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
7180#endif
7181
7182 uint32_t const idxStack = pReNative->cCondDepth;
7183 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
7184
7185 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
7186 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
7187
7188 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
7189 pEntry->fInElse = false;
7190 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
7191 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
7192
7193 return pEntry;
7194}
7195
7196
7197/**
7198 * Start of the if-block, snapshotting the register and variable state.
7199 */
7200DECL_INLINE_THROW(void)
7201iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
7202{
7203 Assert(offIfBlock != UINT32_MAX);
7204 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7205 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7206 Assert(!pEntry->fInElse);
7207
7208 /* Define the start of the IF block if request or for disassembly purposes. */
7209 if (idxLabelIf != UINT32_MAX)
7210 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
7211#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7212 else
7213 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
7214#else
7215 RT_NOREF(offIfBlock);
7216#endif
7217
7218#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7219 Assert(pReNative->Core.offPc == 0);
7220#endif
7221
7222 /* Copy the initial state so we can restore it in the 'else' block. */
7223 pEntry->InitialState = pReNative->Core;
7224}
7225
7226
7227#define IEM_MC_ELSE() } while (0); \
7228 off = iemNativeEmitElse(pReNative, off); \
7229 do {
7230
7231/** Emits code related to IEM_MC_ELSE. */
7232DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7233{
7234 /* Check sanity and get the conditional stack entry. */
7235 Assert(off != UINT32_MAX);
7236 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7237 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7238 Assert(!pEntry->fInElse);
7239
7240 /* Jump to the endif */
7241 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
7242
7243 /* Define the else label and enter the else part of the condition. */
7244 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
7245 pEntry->fInElse = true;
7246
7247#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7248 Assert(pReNative->Core.offPc == 0);
7249#endif
7250
7251 /* Snapshot the core state so we can do a merge at the endif and restore
7252 the snapshot we took at the start of the if-block. */
7253 pEntry->IfFinalState = pReNative->Core;
7254 pReNative->Core = pEntry->InitialState;
7255
7256 return off;
7257}
7258
7259
7260#define IEM_MC_ENDIF() } while (0); \
7261 off = iemNativeEmitEndIf(pReNative, off)
7262
7263/** Emits code related to IEM_MC_ENDIF. */
7264DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7265{
7266 /* Check sanity and get the conditional stack entry. */
7267 Assert(off != UINT32_MAX);
7268 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7269 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7270
7271#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7272 Assert(pReNative->Core.offPc == 0);
7273#endif
7274
7275 /*
7276 * Now we have find common group with the core state at the end of the
7277 * if-final. Use the smallest common denominator and just drop anything
7278 * that isn't the same in both states.
7279 */
7280 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
7281 * which is why we're doing this at the end of the else-block.
7282 * But we'd need more info about future for that to be worth the effort. */
7283 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
7284 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
7285 {
7286 /* shadow guest stuff first. */
7287 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
7288 if (fGstRegs)
7289 {
7290 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
7291 do
7292 {
7293 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
7294 fGstRegs &= ~RT_BIT_64(idxGstReg);
7295
7296 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
7297 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
7298 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
7299 {
7300 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
7301 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
7302 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
7303 }
7304 } while (fGstRegs);
7305 }
7306 else
7307 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
7308
7309 /* Check variables next. For now we must require them to be identical
7310 or stuff we can recreate. */
7311 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
7312 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
7313 if (fVars)
7314 {
7315 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
7316 do
7317 {
7318 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
7319 fVars &= ~RT_BIT_32(idxVar);
7320
7321 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
7322 {
7323 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
7324 continue;
7325 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7326 {
7327 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7328 if (idxHstReg != UINT8_MAX)
7329 {
7330 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7331 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7332 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
7333 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7334 }
7335 continue;
7336 }
7337 }
7338 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
7339 continue;
7340
7341 /* Irreconcilable, so drop it. */
7342 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7343 if (idxHstReg != UINT8_MAX)
7344 {
7345 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7346 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7347 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
7348 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7349 }
7350 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7351 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7352 } while (fVars);
7353 }
7354
7355 /* Finally, check that the host register allocations matches. */
7356 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
7357 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
7358 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
7359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
7360 }
7361
7362 /*
7363 * Define the endif label and maybe the else one if we're still in the 'if' part.
7364 */
7365 if (!pEntry->fInElse)
7366 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
7367 else
7368 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
7369 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
7370
7371 /* Pop the conditional stack.*/
7372 pReNative->cCondDepth -= 1;
7373
7374 return off;
7375}
7376
7377
7378#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
7379 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
7380 do {
7381
7382/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
7383DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7384{
7385 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7386
7387 /* Get the eflags. */
7388 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7389 kIemNativeGstRegUse_ReadOnly);
7390
7391 /* Test and jump. */
7392 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7393
7394 /* Free but don't flush the EFlags register. */
7395 iemNativeRegFreeTmp(pReNative, idxEflReg);
7396
7397 /* Make a copy of the core state now as we start the if-block. */
7398 iemNativeCondStartIfBlock(pReNative, off);
7399
7400 return off;
7401}
7402
7403
7404#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
7405 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
7406 do {
7407
7408/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
7409DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7410{
7411 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7412
7413 /* Get the eflags. */
7414 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7415 kIemNativeGstRegUse_ReadOnly);
7416
7417 /* Test and jump. */
7418 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7419
7420 /* Free but don't flush the EFlags register. */
7421 iemNativeRegFreeTmp(pReNative, idxEflReg);
7422
7423 /* Make a copy of the core state now as we start the if-block. */
7424 iemNativeCondStartIfBlock(pReNative, off);
7425
7426 return off;
7427}
7428
7429
7430#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
7431 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
7432 do {
7433
7434/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
7435DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7436{
7437 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7438
7439 /* Get the eflags. */
7440 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7441 kIemNativeGstRegUse_ReadOnly);
7442
7443 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7444 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7445
7446 /* Test and jump. */
7447 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7448
7449 /* Free but don't flush the EFlags register. */
7450 iemNativeRegFreeTmp(pReNative, idxEflReg);
7451
7452 /* Make a copy of the core state now as we start the if-block. */
7453 iemNativeCondStartIfBlock(pReNative, off);
7454
7455 return off;
7456}
7457
7458
7459#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
7460 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
7461 do {
7462
7463/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
7464DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7465{
7466 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7467
7468 /* Get the eflags. */
7469 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7470 kIemNativeGstRegUse_ReadOnly);
7471
7472 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7473 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7474
7475 /* Test and jump. */
7476 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7477
7478 /* Free but don't flush the EFlags register. */
7479 iemNativeRegFreeTmp(pReNative, idxEflReg);
7480
7481 /* Make a copy of the core state now as we start the if-block. */
7482 iemNativeCondStartIfBlock(pReNative, off);
7483
7484 return off;
7485}
7486
7487
7488#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
7489 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
7490 do {
7491
7492#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
7493 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
7494 do {
7495
7496/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
7497DECL_INLINE_THROW(uint32_t)
7498iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7499 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7500{
7501 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7502
7503 /* Get the eflags. */
7504 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7505 kIemNativeGstRegUse_ReadOnly);
7506
7507 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7508 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7509
7510 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7511 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7512 Assert(iBitNo1 != iBitNo2);
7513
7514#ifdef RT_ARCH_AMD64
7515 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
7516
7517 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7518 if (iBitNo1 > iBitNo2)
7519 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7520 else
7521 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7522 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7523
7524#elif defined(RT_ARCH_ARM64)
7525 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7526 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7527
7528 /* and tmpreg, eflreg, #1<<iBitNo1 */
7529 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7530
7531 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7532 if (iBitNo1 > iBitNo2)
7533 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7534 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7535 else
7536 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7537 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7538
7539 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7540
7541#else
7542# error "Port me"
7543#endif
7544
7545 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7546 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7547 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7548
7549 /* Free but don't flush the EFlags and tmp registers. */
7550 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7551 iemNativeRegFreeTmp(pReNative, idxEflReg);
7552
7553 /* Make a copy of the core state now as we start the if-block. */
7554 iemNativeCondStartIfBlock(pReNative, off);
7555
7556 return off;
7557}
7558
7559
7560#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
7561 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
7562 do {
7563
7564#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
7565 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
7566 do {
7567
7568/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
7569 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
7570DECL_INLINE_THROW(uint32_t)
7571iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
7572 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7573{
7574 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7575
7576 /* We need an if-block label for the non-inverted variant. */
7577 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
7578 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
7579
7580 /* Get the eflags. */
7581 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7582 kIemNativeGstRegUse_ReadOnly);
7583
7584 /* Translate the flag masks to bit numbers. */
7585 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7586 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7587
7588 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7589 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7590 Assert(iBitNo1 != iBitNo);
7591
7592 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7593 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7594 Assert(iBitNo2 != iBitNo);
7595 Assert(iBitNo2 != iBitNo1);
7596
7597#ifdef RT_ARCH_AMD64
7598 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
7599#elif defined(RT_ARCH_ARM64)
7600 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7601#endif
7602
7603 /* Check for the lone bit first. */
7604 if (!fInverted)
7605 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7606 else
7607 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
7608
7609 /* Then extract and compare the other two bits. */
7610#ifdef RT_ARCH_AMD64
7611 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7612 if (iBitNo1 > iBitNo2)
7613 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7614 else
7615 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7616 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7617
7618#elif defined(RT_ARCH_ARM64)
7619 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7620
7621 /* and tmpreg, eflreg, #1<<iBitNo1 */
7622 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7623
7624 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7625 if (iBitNo1 > iBitNo2)
7626 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7627 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7628 else
7629 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7630 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7631
7632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7633
7634#else
7635# error "Port me"
7636#endif
7637
7638 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7639 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7640 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7641
7642 /* Free but don't flush the EFlags and tmp registers. */
7643 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7644 iemNativeRegFreeTmp(pReNative, idxEflReg);
7645
7646 /* Make a copy of the core state now as we start the if-block. */
7647 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
7648
7649 return off;
7650}
7651
7652
7653#define IEM_MC_IF_CX_IS_NZ() \
7654 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
7655 do {
7656
7657/** Emits code for IEM_MC_IF_CX_IS_NZ. */
7658DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7659{
7660 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7661
7662 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7663 kIemNativeGstRegUse_ReadOnly);
7664 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
7665 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7666
7667 iemNativeCondStartIfBlock(pReNative, off);
7668 return off;
7669}
7670
7671
7672#define IEM_MC_IF_ECX_IS_NZ() \
7673 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
7674 do {
7675
7676#define IEM_MC_IF_RCX_IS_NZ() \
7677 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
7678 do {
7679
7680/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
7681DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7682{
7683 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7684
7685 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7686 kIemNativeGstRegUse_ReadOnly);
7687 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
7688 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7689
7690 iemNativeCondStartIfBlock(pReNative, off);
7691 return off;
7692}
7693
7694
7695#define IEM_MC_IF_CX_IS_NOT_ONE() \
7696 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
7697 do {
7698
7699/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
7700DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7701{
7702 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7703
7704 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7705 kIemNativeGstRegUse_ReadOnly);
7706#ifdef RT_ARCH_AMD64
7707 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7708#else
7709 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7710 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7711 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7712#endif
7713 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7714
7715 iemNativeCondStartIfBlock(pReNative, off);
7716 return off;
7717}
7718
7719
7720#define IEM_MC_IF_ECX_IS_NOT_ONE() \
7721 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
7722 do {
7723
7724#define IEM_MC_IF_RCX_IS_NOT_ONE() \
7725 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
7726 do {
7727
7728/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
7729DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7730{
7731 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7732
7733 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7734 kIemNativeGstRegUse_ReadOnly);
7735 if (f64Bit)
7736 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7737 else
7738 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7739 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7740
7741 iemNativeCondStartIfBlock(pReNative, off);
7742 return off;
7743}
7744
7745
7746#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7747 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
7748 do {
7749
7750#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7751 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
7752 do {
7753
7754/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
7755 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7756DECL_INLINE_THROW(uint32_t)
7757iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
7758{
7759 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7760
7761 /* We have to load both RCX and EFLAGS before we can start branching,
7762 otherwise we'll end up in the else-block with an inconsistent
7763 register allocator state.
7764 Doing EFLAGS first as it's more likely to be loaded, right? */
7765 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7766 kIemNativeGstRegUse_ReadOnly);
7767 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7768 kIemNativeGstRegUse_ReadOnly);
7769
7770 /** @todo we could reduce this to a single branch instruction by spending a
7771 * temporary register and some setnz stuff. Not sure if loops are
7772 * worth it. */
7773 /* Check CX. */
7774#ifdef RT_ARCH_AMD64
7775 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7776#else
7777 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7778 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7779 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7780#endif
7781
7782 /* Check the EFlags bit. */
7783 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7784 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7785 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7786 !fCheckIfSet /*fJmpIfSet*/);
7787
7788 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7789 iemNativeRegFreeTmp(pReNative, idxEflReg);
7790
7791 iemNativeCondStartIfBlock(pReNative, off);
7792 return off;
7793}
7794
7795
7796#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7797 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
7798 do {
7799
7800#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7801 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
7802 do {
7803
7804#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7805 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
7806 do {
7807
7808#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7809 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
7810 do {
7811
7812/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
7813 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
7814 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
7815 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7816DECL_INLINE_THROW(uint32_t)
7817iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7818 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
7819{
7820 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7821
7822 /* We have to load both RCX and EFLAGS before we can start branching,
7823 otherwise we'll end up in the else-block with an inconsistent
7824 register allocator state.
7825 Doing EFLAGS first as it's more likely to be loaded, right? */
7826 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7827 kIemNativeGstRegUse_ReadOnly);
7828 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7829 kIemNativeGstRegUse_ReadOnly);
7830
7831 /** @todo we could reduce this to a single branch instruction by spending a
7832 * temporary register and some setnz stuff. Not sure if loops are
7833 * worth it. */
7834 /* Check RCX/ECX. */
7835 if (f64Bit)
7836 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7837 else
7838 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7839
7840 /* Check the EFlags bit. */
7841 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7842 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7843 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7844 !fCheckIfSet /*fJmpIfSet*/);
7845
7846 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7847 iemNativeRegFreeTmp(pReNative, idxEflReg);
7848
7849 iemNativeCondStartIfBlock(pReNative, off);
7850 return off;
7851}
7852
7853
7854
7855/*********************************************************************************************************************************
7856* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7857*********************************************************************************************************************************/
7858/** Number of hidden arguments for CIMPL calls.
7859 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7860#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7861# define IEM_CIMPL_HIDDEN_ARGS 3
7862#else
7863# define IEM_CIMPL_HIDDEN_ARGS 2
7864#endif
7865
7866#define IEM_MC_NOREF(a_Name) \
7867 RT_NOREF_PV(a_Name)
7868
7869#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7870 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7871
7872#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7873 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7874
7875#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7876 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7877
7878#define IEM_MC_LOCAL(a_Type, a_Name) \
7879 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7880
7881#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7882 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7883
7884
7885/**
7886 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7887 */
7888DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7889{
7890 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7891 return IEM_CIMPL_HIDDEN_ARGS;
7892 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7893 return 1;
7894 return 0;
7895}
7896
7897
7898/**
7899 * Internal work that allocates a variable with kind set to
7900 * kIemNativeVarKind_Invalid and no current stack allocation.
7901 *
7902 * The kind will either be set by the caller or later when the variable is first
7903 * assigned a value.
7904 *
7905 * @returns Unpacked index.
7906 * @internal
7907 */
7908static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7909{
7910 Assert(cbType > 0 && cbType <= 64);
7911 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7912 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7913 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7914 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7915 pReNative->Core.aVars[idxVar].cbVar = cbType;
7916 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7917 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7918 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7919 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7920 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7921 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7922 pReNative->Core.aVars[idxVar].u.uValue = 0;
7923 return idxVar;
7924}
7925
7926
7927/**
7928 * Internal work that allocates an argument variable w/o setting enmKind.
7929 *
7930 * @returns Unpacked index.
7931 * @internal
7932 */
7933static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7934{
7935 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7936 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7937 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7938
7939 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7940 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7941 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7942 return idxVar;
7943}
7944
7945
7946/**
7947 * Gets the stack slot for a stack variable, allocating one if necessary.
7948 *
7949 * Calling this function implies that the stack slot will contain a valid
7950 * variable value. The caller deals with any register currently assigned to the
7951 * variable, typically by spilling it into the stack slot.
7952 *
7953 * @returns The stack slot number.
7954 * @param pReNative The recompiler state.
7955 * @param idxVar The variable.
7956 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7957 */
7958DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7959{
7960 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7961 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7962 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7963
7964 /* Already got a slot? */
7965 uint8_t const idxStackSlot = pVar->idxStackSlot;
7966 if (idxStackSlot != UINT8_MAX)
7967 {
7968 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7969 return idxStackSlot;
7970 }
7971
7972 /*
7973 * A single slot is easy to allocate.
7974 * Allocate them from the top end, closest to BP, to reduce the displacement.
7975 */
7976 if (pVar->cbVar <= sizeof(uint64_t))
7977 {
7978 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7979 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7980 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7981 pVar->idxStackSlot = (uint8_t)iSlot;
7982 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7983 return (uint8_t)iSlot;
7984 }
7985
7986 /*
7987 * We need more than one stack slot.
7988 *
7989 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7990 */
7991 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7992 Assert(pVar->cbVar <= 64);
7993 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7994 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7995 uint32_t bmStack = ~pReNative->Core.bmStack;
7996 while (bmStack != UINT32_MAX)
7997 {
7998/** @todo allocate from the top to reduce BP displacement. */
7999 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
8000 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8001 if (!(iSlot & fBitAlignMask))
8002 {
8003 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
8004 {
8005 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
8006 pVar->idxStackSlot = (uint8_t)iSlot;
8007 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8008 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
8009 return (uint8_t)iSlot;
8010 }
8011 }
8012 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
8013 }
8014 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8015}
8016
8017
8018/**
8019 * Changes the variable to a stack variable.
8020 *
8021 * Currently this is s only possible to do the first time the variable is used,
8022 * switching later is can be implemented but not done.
8023 *
8024 * @param pReNative The recompiler state.
8025 * @param idxVar The variable.
8026 * @throws VERR_IEM_VAR_IPE_2
8027 */
8028static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8029{
8030 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8031 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8032 if (pVar->enmKind != kIemNativeVarKind_Stack)
8033 {
8034 /* We could in theory transition from immediate to stack as well, but it
8035 would involve the caller doing work storing the value on the stack. So,
8036 till that's required we only allow transition from invalid. */
8037 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8038 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8039 pVar->enmKind = kIemNativeVarKind_Stack;
8040
8041 /* Note! We don't allocate a stack slot here, that's only done when a
8042 slot is actually needed to hold a variable value. */
8043 }
8044}
8045
8046
8047/**
8048 * Sets it to a variable with a constant value.
8049 *
8050 * This does not require stack storage as we know the value and can always
8051 * reload it, unless of course it's referenced.
8052 *
8053 * @param pReNative The recompiler state.
8054 * @param idxVar The variable.
8055 * @param uValue The immediate value.
8056 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8057 */
8058static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
8059{
8060 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8061 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8062 if (pVar->enmKind != kIemNativeVarKind_Immediate)
8063 {
8064 /* Only simple transitions for now. */
8065 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8066 pVar->enmKind = kIemNativeVarKind_Immediate;
8067 }
8068 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8069
8070 pVar->u.uValue = uValue;
8071 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
8072 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
8073 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
8074}
8075
8076
8077/**
8078 * Sets the variable to a reference (pointer) to @a idxOtherVar.
8079 *
8080 * This does not require stack storage as we know the value and can always
8081 * reload it. Loading is postponed till needed.
8082 *
8083 * @param pReNative The recompiler state.
8084 * @param idxVar The variable. Unpacked.
8085 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
8086 *
8087 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8088 * @internal
8089 */
8090static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
8091{
8092 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
8093 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
8094
8095 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
8096 {
8097 /* Only simple transitions for now. */
8098 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
8099 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8100 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
8101 }
8102 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8103
8104 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
8105
8106 /* Update the other variable, ensure it's a stack variable. */
8107 /** @todo handle variables with const values... that'll go boom now. */
8108 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
8109 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8110}
8111
8112
8113/**
8114 * Sets the variable to a reference (pointer) to a guest register reference.
8115 *
8116 * This does not require stack storage as we know the value and can always
8117 * reload it. Loading is postponed till needed.
8118 *
8119 * @param pReNative The recompiler state.
8120 * @param idxVar The variable.
8121 * @param enmRegClass The class guest registers to reference.
8122 * @param idxReg The register within @a enmRegClass to reference.
8123 *
8124 * @throws VERR_IEM_VAR_IPE_2
8125 */
8126static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8127 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
8128{
8129 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8130 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8131
8132 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
8133 {
8134 /* Only simple transitions for now. */
8135 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8136 pVar->enmKind = kIemNativeVarKind_GstRegRef;
8137 }
8138 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8139
8140 pVar->u.GstRegRef.enmClass = enmRegClass;
8141 pVar->u.GstRegRef.idx = idxReg;
8142}
8143
8144
8145DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
8146{
8147 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8148}
8149
8150
8151DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
8152{
8153 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8154
8155 /* Since we're using a generic uint64_t value type, we must truncate it if
8156 the variable is smaller otherwise we may end up with too large value when
8157 scaling up a imm8 w/ sign-extension.
8158
8159 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
8160 in the bios, bx=1) when running on arm, because clang expect 16-bit
8161 register parameters to have bits 16 and up set to zero. Instead of
8162 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
8163 CF value in the result. */
8164 switch (cbType)
8165 {
8166 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8167 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8168 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8169 }
8170 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8171 return idxVar;
8172}
8173
8174
8175DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
8176{
8177 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
8178 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
8179 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
8180 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
8181 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
8182 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
8183
8184 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
8185 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
8186 return idxArgVar;
8187}
8188
8189
8190DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
8191{
8192 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8193 /* Don't set to stack now, leave that to the first use as for instance
8194 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
8195 return idxVar;
8196}
8197
8198
8199DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
8200{
8201 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8202
8203 /* Since we're using a generic uint64_t value type, we must truncate it if
8204 the variable is smaller otherwise we may end up with too large value when
8205 scaling up a imm8 w/ sign-extension. */
8206 switch (cbType)
8207 {
8208 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8209 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8210 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8211 }
8212 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8213 return idxVar;
8214}
8215
8216
8217/**
8218 * Makes sure variable @a idxVar has a register assigned to it and that it stays
8219 * fixed till we call iemNativeVarRegisterRelease.
8220 *
8221 * @returns The host register number.
8222 * @param pReNative The recompiler state.
8223 * @param idxVar The variable.
8224 * @param poff Pointer to the instruction buffer offset.
8225 * In case a register needs to be freed up or the value
8226 * loaded off the stack.
8227 * @param fInitialized Set if the variable must already have been initialized.
8228 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8229 * the case.
8230 * @param idxRegPref Preferred register number or UINT8_MAX.
8231 */
8232DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8233 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8234{
8235 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8236 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8237 Assert(pVar->cbVar <= 8);
8238 Assert(!pVar->fRegAcquired);
8239
8240 uint8_t idxReg = pVar->idxReg;
8241 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8242 {
8243 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8244 && pVar->enmKind < kIemNativeVarKind_End);
8245 pVar->fRegAcquired = true;
8246 return idxReg;
8247 }
8248
8249 /*
8250 * If the kind of variable has not yet been set, default to 'stack'.
8251 */
8252 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8253 && pVar->enmKind < kIemNativeVarKind_End);
8254 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8255 iemNativeVarSetKindToStack(pReNative, idxVar);
8256
8257 /*
8258 * We have to allocate a register for the variable, even if its a stack one
8259 * as we don't know if there are modification being made to it before its
8260 * finalized (todo: analyze and insert hints about that?).
8261 *
8262 * If we can, we try get the correct register for argument variables. This
8263 * is assuming that most argument variables are fetched as close as possible
8264 * to the actual call, so that there aren't any interfering hidden calls
8265 * (memory accesses, etc) inbetween.
8266 *
8267 * If we cannot or it's a variable, we make sure no argument registers
8268 * that will be used by this MC block will be allocated here, and we always
8269 * prefer non-volatile registers to avoid needing to spill stuff for internal
8270 * call.
8271 */
8272 /** @todo Detect too early argument value fetches and warn about hidden
8273 * calls causing less optimal code to be generated in the python script. */
8274
8275 uint8_t const uArgNo = pVar->uArgNo;
8276 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
8277 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
8278 {
8279 idxReg = g_aidxIemNativeCallRegs[uArgNo];
8280 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8281 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
8282 }
8283 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
8284 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
8285 {
8286 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8287 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
8288 & ~pReNative->Core.bmHstRegsWithGstShadow
8289 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
8290 & fNotArgsMask;
8291 if (fRegs)
8292 {
8293 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
8294 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
8295 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
8296 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
8297 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
8298 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8299 }
8300 else
8301 {
8302 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8303 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
8304 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8305 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8306 }
8307 }
8308 else
8309 {
8310 idxReg = idxRegPref;
8311 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8312 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8313 }
8314 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8315 pVar->idxReg = idxReg;
8316
8317 /*
8318 * Load it off the stack if we've got a stack slot.
8319 */
8320 uint8_t const idxStackSlot = pVar->idxStackSlot;
8321 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8322 {
8323 Assert(fInitialized);
8324 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8325 switch (pVar->cbVar)
8326 {
8327 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
8328 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
8329 case 3: AssertFailed(); RT_FALL_THRU();
8330 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
8331 default: AssertFailed(); RT_FALL_THRU();
8332 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
8333 }
8334 }
8335 else
8336 {
8337 Assert(idxStackSlot == UINT8_MAX);
8338 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8339 }
8340 pVar->fRegAcquired = true;
8341 return idxReg;
8342}
8343
8344
8345/**
8346 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8347 * guest register.
8348 *
8349 * This function makes sure there is a register for it and sets it to be the
8350 * current shadow copy of @a enmGstReg.
8351 *
8352 * @returns The host register number.
8353 * @param pReNative The recompiler state.
8354 * @param idxVar The variable.
8355 * @param enmGstReg The guest register this variable will be written to
8356 * after this call.
8357 * @param poff Pointer to the instruction buffer offset.
8358 * In case a register needs to be freed up or if the
8359 * variable content needs to be loaded off the stack.
8360 *
8361 * @note We DO NOT expect @a idxVar to be an argument variable,
8362 * because we can only in the commit stage of an instruction when this
8363 * function is used.
8364 */
8365DECL_HIDDEN_THROW(uint8_t)
8366iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8367{
8368 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8369 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8370 Assert(!pVar->fRegAcquired);
8371 AssertMsgStmt( pVar->cbVar <= 8
8372 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8373 || pVar->enmKind == kIemNativeVarKind_Stack),
8374 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8375 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8376 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8377
8378 /*
8379 * This shouldn't ever be used for arguments, unless it's in a weird else
8380 * branch that doesn't do any calling and even then it's questionable.
8381 *
8382 * However, in case someone writes crazy wrong MC code and does register
8383 * updates before making calls, just use the regular register allocator to
8384 * ensure we get a register suitable for the intended argument number.
8385 */
8386 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8387
8388 /*
8389 * If there is already a register for the variable, we transfer/set the
8390 * guest shadow copy assignment to it.
8391 */
8392 uint8_t idxReg = pVar->idxReg;
8393 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8394 {
8395 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8396 {
8397 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8398 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8399 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8400 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8401 }
8402 else
8403 {
8404 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8405 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8406 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8407 }
8408 /** @todo figure this one out. We need some way of making sure the register isn't
8409 * modified after this point, just in case we start writing crappy MC code. */
8410 pVar->enmGstReg = enmGstReg;
8411 pVar->fRegAcquired = true;
8412 return idxReg;
8413 }
8414 Assert(pVar->uArgNo == UINT8_MAX);
8415
8416 /*
8417 * Because this is supposed to be the commit stage, we're just tag along with the
8418 * temporary register allocator and upgrade it to a variable register.
8419 */
8420 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8421 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8422 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8423 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8424 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8425 pVar->idxReg = idxReg;
8426
8427 /*
8428 * Now we need to load the register value.
8429 */
8430 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8431 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8432 else
8433 {
8434 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8435 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8436 switch (pVar->cbVar)
8437 {
8438 case sizeof(uint64_t):
8439 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8440 break;
8441 case sizeof(uint32_t):
8442 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8443 break;
8444 case sizeof(uint16_t):
8445 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8446 break;
8447 case sizeof(uint8_t):
8448 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8449 break;
8450 default:
8451 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8452 }
8453 }
8454
8455 pVar->fRegAcquired = true;
8456 return idxReg;
8457}
8458
8459
8460/**
8461 * Sets the host register for @a idxVarRc to @a idxReg.
8462 *
8463 * The register must not be allocated. Any guest register shadowing will be
8464 * implictly dropped by this call.
8465 *
8466 * The variable must not have any register associated with it (causes
8467 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
8468 * implied.
8469 *
8470 * @returns idxReg
8471 * @param pReNative The recompiler state.
8472 * @param idxVar The variable.
8473 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
8474 * @param off For recording in debug info.
8475 *
8476 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
8477 */
8478DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
8479{
8480 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8481 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8482 Assert(!pVar->fRegAcquired);
8483 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8484 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
8485 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
8486
8487 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
8488 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8489
8490 iemNativeVarSetKindToStack(pReNative, idxVar);
8491 pVar->idxReg = idxReg;
8492
8493 return idxReg;
8494}
8495
8496
8497/**
8498 * A convenient helper function.
8499 */
8500DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8501 uint8_t idxReg, uint32_t *poff)
8502{
8503 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
8504 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
8505 return idxReg;
8506}
8507
8508
8509/**
8510 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8511 *
8512 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8513 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8514 * requirement of flushing anything in volatile host registers when making a
8515 * call.
8516 *
8517 * @returns New @a off value.
8518 * @param pReNative The recompiler state.
8519 * @param off The code buffer position.
8520 * @param fHstRegsNotToSave Set of registers not to save & restore.
8521 */
8522DECL_HIDDEN_THROW(uint32_t)
8523iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8524{
8525 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8526 if (fHstRegs)
8527 {
8528 do
8529 {
8530 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8531 fHstRegs &= ~RT_BIT_32(idxHstReg);
8532
8533 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8534 {
8535 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8536 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8537 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8538 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8539 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8540 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8541 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8542 {
8543 case kIemNativeVarKind_Stack:
8544 {
8545 /* Temporarily spill the variable register. */
8546 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8547 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8548 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8549 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8550 continue;
8551 }
8552
8553 case kIemNativeVarKind_Immediate:
8554 case kIemNativeVarKind_VarRef:
8555 case kIemNativeVarKind_GstRegRef:
8556 /* It is weird to have any of these loaded at this point. */
8557 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8558 continue;
8559
8560 case kIemNativeVarKind_End:
8561 case kIemNativeVarKind_Invalid:
8562 break;
8563 }
8564 AssertFailed();
8565 }
8566 else
8567 {
8568 /*
8569 * Allocate a temporary stack slot and spill the register to it.
8570 */
8571 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8572 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8573 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8574 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8575 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8576 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8577 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8578 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8579 }
8580 } while (fHstRegs);
8581 }
8582 return off;
8583}
8584
8585
8586/**
8587 * Emit code to restore volatile registers after to a call to a helper.
8588 *
8589 * @returns New @a off value.
8590 * @param pReNative The recompiler state.
8591 * @param off The code buffer position.
8592 * @param fHstRegsNotToSave Set of registers not to save & restore.
8593 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8594 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8595 */
8596DECL_HIDDEN_THROW(uint32_t)
8597iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8598{
8599 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8600 if (fHstRegs)
8601 {
8602 do
8603 {
8604 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8605 fHstRegs &= ~RT_BIT_32(idxHstReg);
8606
8607 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8608 {
8609 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8610 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8611 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8612 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8613 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8614 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8615 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8616 {
8617 case kIemNativeVarKind_Stack:
8618 {
8619 /* Unspill the variable register. */
8620 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8621 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8622 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8623 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8624 continue;
8625 }
8626
8627 case kIemNativeVarKind_Immediate:
8628 case kIemNativeVarKind_VarRef:
8629 case kIemNativeVarKind_GstRegRef:
8630 /* It is weird to have any of these loaded at this point. */
8631 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8632 continue;
8633
8634 case kIemNativeVarKind_End:
8635 case kIemNativeVarKind_Invalid:
8636 break;
8637 }
8638 AssertFailed();
8639 }
8640 else
8641 {
8642 /*
8643 * Restore from temporary stack slot.
8644 */
8645 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8646 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8647 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8648 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8649
8650 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8651 }
8652 } while (fHstRegs);
8653 }
8654 return off;
8655}
8656
8657
8658/**
8659 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8660 *
8661 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8662 *
8663 * ASSUMES that @a idxVar is valid and unpacked.
8664 */
8665DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8666{
8667 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8668 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8669 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8670 {
8671 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8672 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8673 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8674 Assert(cSlots > 0);
8675 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8676 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8677 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8678 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8679 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8680 }
8681 else
8682 Assert(idxStackSlot == UINT8_MAX);
8683}
8684
8685
8686/**
8687 * Worker that frees a single variable.
8688 *
8689 * ASSUMES that @a idxVar is valid and unpacked.
8690 */
8691DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8692{
8693 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8694 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8695 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8696
8697 /* Free the host register first if any assigned. */
8698 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8699 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8700 {
8701 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8702 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8703 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8704 }
8705
8706 /* Free argument mapping. */
8707 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8708 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8709 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8710
8711 /* Free the stack slots. */
8712 iemNativeVarFreeStackSlots(pReNative, idxVar);
8713
8714 /* Free the actual variable. */
8715 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8716 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8717}
8718
8719
8720/**
8721 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8722 */
8723DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8724{
8725 while (bmVars != 0)
8726 {
8727 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8728 bmVars &= ~RT_BIT_32(idxVar);
8729
8730#if 1 /** @todo optimize by simplifying this later... */
8731 iemNativeVarFreeOneWorker(pReNative, idxVar);
8732#else
8733 /* Only need to free the host register, the rest is done as bulk updates below. */
8734 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8735 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8736 {
8737 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8738 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8739 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8740 }
8741#endif
8742 }
8743#if 0 /** @todo optimize by simplifying this later... */
8744 pReNative->Core.bmVars = 0;
8745 pReNative->Core.bmStack = 0;
8746 pReNative->Core.u64ArgVars = UINT64_MAX;
8747#endif
8748}
8749
8750
8751/**
8752 * This is called by IEM_MC_END() to clean up all variables.
8753 */
8754DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
8755{
8756 uint32_t const bmVars = pReNative->Core.bmVars;
8757 if (bmVars != 0)
8758 iemNativeVarFreeAllSlow(pReNative, bmVars);
8759 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8760 Assert(pReNative->Core.bmStack == 0);
8761}
8762
8763
8764#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
8765
8766/**
8767 * This is called by IEM_MC_FREE_LOCAL.
8768 */
8769DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8770{
8771 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8772 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
8773 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8774}
8775
8776
8777#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
8778
8779/**
8780 * This is called by IEM_MC_FREE_ARG.
8781 */
8782DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8783{
8784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8785 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
8786 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8787}
8788
8789
8790#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
8791
8792/**
8793 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
8794 */
8795DECL_INLINE_THROW(uint32_t)
8796iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
8797{
8798 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
8799 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
8800 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8801 Assert( pVarDst->cbVar == sizeof(uint16_t)
8802 || pVarDst->cbVar == sizeof(uint32_t));
8803
8804 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
8805 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
8806 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
8807 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
8808 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8809
8810 Assert(pVarDst->cbVar < pVarSrc->cbVar);
8811
8812 /*
8813 * Special case for immediates.
8814 */
8815 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
8816 {
8817 switch (pVarDst->cbVar)
8818 {
8819 case sizeof(uint16_t):
8820 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
8821 break;
8822 case sizeof(uint32_t):
8823 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
8824 break;
8825 default: AssertFailed(); break;
8826 }
8827 }
8828 else
8829 {
8830 /*
8831 * The generic solution for now.
8832 */
8833 /** @todo optimize this by having the python script make sure the source
8834 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
8835 * statement. Then we could just transfer the register assignments. */
8836 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
8837 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
8838 switch (pVarDst->cbVar)
8839 {
8840 case sizeof(uint16_t):
8841 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
8842 break;
8843 case sizeof(uint32_t):
8844 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8845 break;
8846 default: AssertFailed(); break;
8847 }
8848 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8849 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8850 }
8851 return off;
8852}
8853
8854
8855
8856/*********************************************************************************************************************************
8857* Emitters for IEM_MC_CALL_CIMPL_XXX *
8858*********************************************************************************************************************************/
8859
8860/**
8861 * Emits code to load a reference to the given guest register into @a idxGprDst.
8862 */
8863DECL_INLINE_THROW(uint32_t)
8864iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8865 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8866{
8867#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8868 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8869#endif
8870
8871 /*
8872 * Get the offset relative to the CPUMCTX structure.
8873 */
8874 uint32_t offCpumCtx;
8875 switch (enmClass)
8876 {
8877 case kIemNativeGstRegRef_Gpr:
8878 Assert(idxRegInClass < 16);
8879 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8880 break;
8881
8882 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8883 Assert(idxRegInClass < 4);
8884 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8885 break;
8886
8887 case kIemNativeGstRegRef_EFlags:
8888 Assert(idxRegInClass == 0);
8889 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8890 break;
8891
8892 case kIemNativeGstRegRef_MxCsr:
8893 Assert(idxRegInClass == 0);
8894 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8895 break;
8896
8897 case kIemNativeGstRegRef_FpuReg:
8898 Assert(idxRegInClass < 8);
8899 AssertFailed(); /** @todo what kind of indexing? */
8900 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8901 break;
8902
8903 case kIemNativeGstRegRef_MReg:
8904 Assert(idxRegInClass < 8);
8905 AssertFailed(); /** @todo what kind of indexing? */
8906 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8907 break;
8908
8909 case kIemNativeGstRegRef_XReg:
8910 Assert(idxRegInClass < 16);
8911 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8912 break;
8913
8914 default:
8915 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8916 }
8917
8918 /*
8919 * Load the value into the destination register.
8920 */
8921#ifdef RT_ARCH_AMD64
8922 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8923
8924#elif defined(RT_ARCH_ARM64)
8925 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8926 Assert(offCpumCtx < 4096);
8927 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8928
8929#else
8930# error "Port me!"
8931#endif
8932
8933 return off;
8934}
8935
8936
8937/**
8938 * Common code for CIMPL and AIMPL calls.
8939 *
8940 * These are calls that uses argument variables and such. They should not be
8941 * confused with internal calls required to implement an MC operation,
8942 * like a TLB load and similar.
8943 *
8944 * Upon return all that is left to do is to load any hidden arguments and
8945 * perform the call. All argument variables are freed.
8946 *
8947 * @returns New code buffer offset; throws VBox status code on error.
8948 * @param pReNative The native recompile state.
8949 * @param off The code buffer offset.
8950 * @param cArgs The total nubmer of arguments (includes hidden
8951 * count).
8952 * @param cHiddenArgs The number of hidden arguments. The hidden
8953 * arguments must not have any variable declared for
8954 * them, whereas all the regular arguments must
8955 * (tstIEMCheckMc ensures this).
8956 */
8957DECL_HIDDEN_THROW(uint32_t)
8958iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8959{
8960#ifdef VBOX_STRICT
8961 /*
8962 * Assert sanity.
8963 */
8964 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8965 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8966 for (unsigned i = 0; i < cHiddenArgs; i++)
8967 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8968 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8969 {
8970 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8971 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8972 }
8973 iemNativeRegAssertSanity(pReNative);
8974#endif
8975
8976 /* We don't know what the called function makes use of, so flush any pending register writes. */
8977 off = iemNativeRegFlushPendingWrites(pReNative, off);
8978
8979 /*
8980 * Before we do anything else, go over variables that are referenced and
8981 * make sure they are not in a register.
8982 */
8983 uint32_t bmVars = pReNative->Core.bmVars;
8984 if (bmVars)
8985 {
8986 do
8987 {
8988 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8989 bmVars &= ~RT_BIT_32(idxVar);
8990
8991 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8992 {
8993 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8994 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8995 {
8996 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8997 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8998 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8999 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9000 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9001
9002 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9003 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
9004 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9005 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
9006 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
9007 }
9008 }
9009 } while (bmVars != 0);
9010#if 0 //def VBOX_STRICT
9011 iemNativeRegAssertSanity(pReNative);
9012#endif
9013 }
9014
9015 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
9016
9017 /*
9018 * First, go over the host registers that will be used for arguments and make
9019 * sure they either hold the desired argument or are free.
9020 */
9021 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
9022 {
9023 for (uint32_t i = 0; i < cRegArgs; i++)
9024 {
9025 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9026 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9027 {
9028 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
9029 {
9030 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
9031 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9032 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9033 Assert(pVar->idxReg == idxArgReg);
9034 uint8_t const uArgNo = pVar->uArgNo;
9035 if (uArgNo == i)
9036 { /* prefect */ }
9037 /* The variable allocator logic should make sure this is impossible,
9038 except for when the return register is used as a parameter (ARM,
9039 but not x86). */
9040#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
9041 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
9042 {
9043# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9044# error "Implement this"
9045# endif
9046 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
9047 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
9048 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
9049 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9050 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
9051 }
9052#endif
9053 else
9054 {
9055 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9056
9057 if (pVar->enmKind == kIemNativeVarKind_Stack)
9058 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
9059 else
9060 {
9061 /* just free it, can be reloaded if used again */
9062 pVar->idxReg = UINT8_MAX;
9063 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
9064 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
9065 }
9066 }
9067 }
9068 else
9069 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
9070 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
9071 }
9072 }
9073#if 0 //def VBOX_STRICT
9074 iemNativeRegAssertSanity(pReNative);
9075#endif
9076 }
9077
9078 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
9079
9080#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9081 /*
9082 * If there are any stack arguments, make sure they are in their place as well.
9083 *
9084 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
9085 * the caller) be loading it later and it must be free (see first loop).
9086 */
9087 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
9088 {
9089 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
9090 {
9091 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9092 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
9093 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9094 {
9095 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
9096 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
9097 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
9098 pVar->idxReg = UINT8_MAX;
9099 }
9100 else
9101 {
9102 /* Use ARG0 as temp for stuff we need registers for. */
9103 switch (pVar->enmKind)
9104 {
9105 case kIemNativeVarKind_Stack:
9106 {
9107 uint8_t const idxStackSlot = pVar->idxStackSlot;
9108 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9109 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
9110 iemNativeStackCalcBpDisp(idxStackSlot));
9111 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9112 continue;
9113 }
9114
9115 case kIemNativeVarKind_Immediate:
9116 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
9117 continue;
9118
9119 case kIemNativeVarKind_VarRef:
9120 {
9121 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9122 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9123 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9124 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9125 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9126 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9127 {
9128 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9129 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9130 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9131 }
9132 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9133 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9134 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
9135 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9136 continue;
9137 }
9138
9139 case kIemNativeVarKind_GstRegRef:
9140 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
9141 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9142 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9143 continue;
9144
9145 case kIemNativeVarKind_Invalid:
9146 case kIemNativeVarKind_End:
9147 break;
9148 }
9149 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9150 }
9151 }
9152# if 0 //def VBOX_STRICT
9153 iemNativeRegAssertSanity(pReNative);
9154# endif
9155 }
9156#else
9157 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
9158#endif
9159
9160 /*
9161 * Make sure the argument variables are loaded into their respective registers.
9162 *
9163 * We can optimize this by ASSUMING that any register allocations are for
9164 * registeres that have already been loaded and are ready. The previous step
9165 * saw to that.
9166 */
9167 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
9168 {
9169 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9170 {
9171 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9172 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9173 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
9174 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
9175 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
9176 else
9177 {
9178 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9179 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9180 {
9181 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9182 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
9183 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
9184 | RT_BIT_32(idxArgReg);
9185 pVar->idxReg = idxArgReg;
9186 }
9187 else
9188 {
9189 /* Use ARG0 as temp for stuff we need registers for. */
9190 switch (pVar->enmKind)
9191 {
9192 case kIemNativeVarKind_Stack:
9193 {
9194 uint8_t const idxStackSlot = pVar->idxStackSlot;
9195 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9196 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
9197 continue;
9198 }
9199
9200 case kIemNativeVarKind_Immediate:
9201 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
9202 continue;
9203
9204 case kIemNativeVarKind_VarRef:
9205 {
9206 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9207 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9208 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
9209 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9210 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9211 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9212 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9213 {
9214 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9215 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9216 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9217 }
9218 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9219 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9220 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
9221 continue;
9222 }
9223
9224 case kIemNativeVarKind_GstRegRef:
9225 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
9226 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9227 continue;
9228
9229 case kIemNativeVarKind_Invalid:
9230 case kIemNativeVarKind_End:
9231 break;
9232 }
9233 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9234 }
9235 }
9236 }
9237#if 0 //def VBOX_STRICT
9238 iemNativeRegAssertSanity(pReNative);
9239#endif
9240 }
9241#ifdef VBOX_STRICT
9242 else
9243 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9244 {
9245 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
9246 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
9247 }
9248#endif
9249
9250 /*
9251 * Free all argument variables (simplified).
9252 * Their lifetime always expires with the call they are for.
9253 */
9254 /** @todo Make the python script check that arguments aren't used after
9255 * IEM_MC_CALL_XXXX. */
9256 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
9257 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
9258 * an argument value. There is also some FPU stuff. */
9259 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
9260 {
9261 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
9262 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
9263
9264 /* no need to free registers: */
9265 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
9266 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9267 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9268 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9269 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9270 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9271
9272 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9273 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9274 iemNativeVarFreeStackSlots(pReNative, idxVar);
9275 }
9276 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9277
9278 /*
9279 * Flush volatile registers as we make the call.
9280 */
9281 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9282
9283 return off;
9284}
9285
9286
9287/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
9288DECL_HIDDEN_THROW(uint32_t)
9289iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
9290 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
9291
9292{
9293 /*
9294 * Do all the call setup and cleanup.
9295 */
9296 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
9297
9298 /*
9299 * Load the two or three hidden arguments.
9300 */
9301#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9302 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
9303 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9304 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
9305#else
9306 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9307 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
9308#endif
9309
9310 /*
9311 * Make the call and check the return code.
9312 *
9313 * Shadow PC copies are always flushed here, other stuff depends on flags.
9314 * Segment and general purpose registers are explictily flushed via the
9315 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
9316 * macros.
9317 */
9318 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
9319#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9320 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
9321#endif
9322 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
9323 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
9324 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
9325 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
9326
9327 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
9328}
9329
9330
9331#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
9332 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
9333
9334/** Emits code for IEM_MC_CALL_CIMPL_1. */
9335DECL_INLINE_THROW(uint32_t)
9336iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9337 uintptr_t pfnCImpl, uint8_t idxArg0)
9338{
9339 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9340 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
9341}
9342
9343
9344#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
9345 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
9346
9347/** Emits code for IEM_MC_CALL_CIMPL_2. */
9348DECL_INLINE_THROW(uint32_t)
9349iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9350 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
9351{
9352 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9353 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9354 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
9355}
9356
9357
9358#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
9359 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9360 (uintptr_t)a_pfnCImpl, a0, a1, a2)
9361
9362/** Emits code for IEM_MC_CALL_CIMPL_3. */
9363DECL_INLINE_THROW(uint32_t)
9364iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9365 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9366{
9367 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9369 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9370 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
9371}
9372
9373
9374#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
9375 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9376 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
9377
9378/** Emits code for IEM_MC_CALL_CIMPL_4. */
9379DECL_INLINE_THROW(uint32_t)
9380iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9381 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9382{
9383 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9384 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9386 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9387 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
9388}
9389
9390
9391#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
9392 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9393 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
9394
9395/** Emits code for IEM_MC_CALL_CIMPL_4. */
9396DECL_INLINE_THROW(uint32_t)
9397iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9398 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
9399{
9400 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9401 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9402 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9403 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9404 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
9405 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
9406}
9407
9408
9409/** Recompiler debugging: Flush guest register shadow copies. */
9410#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
9411
9412
9413
9414/*********************************************************************************************************************************
9415* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
9416*********************************************************************************************************************************/
9417
9418/**
9419 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
9420 */
9421DECL_INLINE_THROW(uint32_t)
9422iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9423 uintptr_t pfnAImpl, uint8_t cArgs)
9424{
9425 if (idxVarRc != UINT8_MAX)
9426 {
9427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
9428 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
9429 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
9430 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
9431 }
9432
9433 /*
9434 * Do all the call setup and cleanup.
9435 */
9436 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
9437
9438 /*
9439 * Make the call and update the return code variable if we've got one.
9440 */
9441 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9442 if (idxVarRc != UINT8_MAX)
9443 {
9444off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
9445 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
9446 }
9447
9448 return off;
9449}
9450
9451
9452
9453#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
9454 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
9455
9456#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
9457 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
9458
9459/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
9460DECL_INLINE_THROW(uint32_t)
9461iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
9462{
9463 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
9464}
9465
9466
9467#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
9468 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
9469
9470#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
9471 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
9472
9473/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
9474DECL_INLINE_THROW(uint32_t)
9475iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
9476{
9477 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9478 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
9479}
9480
9481
9482#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
9483 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
9484
9485#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
9486 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
9487
9488/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
9489DECL_INLINE_THROW(uint32_t)
9490iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9491 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9492{
9493 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9494 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9495 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
9496}
9497
9498
9499#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
9500 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
9501
9502#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
9503 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
9504
9505/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
9506DECL_INLINE_THROW(uint32_t)
9507iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9508 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9509{
9510 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9511 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9512 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9513 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
9514}
9515
9516
9517#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
9518 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9519
9520#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
9521 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9522
9523/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
9524DECL_INLINE_THROW(uint32_t)
9525iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9526 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9527{
9528 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9529 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9530 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9531 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
9532 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
9533}
9534
9535
9536
9537/*********************************************************************************************************************************
9538* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
9539*********************************************************************************************************************************/
9540
9541#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
9542 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
9543
9544#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9545 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
9546
9547#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9548 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
9549
9550#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9551 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
9552
9553
9554/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
9555 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
9556DECL_INLINE_THROW(uint32_t)
9557iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
9558{
9559 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9560 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9561 Assert(iGRegEx < 20);
9562
9563 /* Same discussion as in iemNativeEmitFetchGregU16 */
9564 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9565 kIemNativeGstRegUse_ReadOnly);
9566
9567 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9568 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9569
9570 /* The value is zero-extended to the full 64-bit host register width. */
9571 if (iGRegEx < 16)
9572 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9573 else
9574 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9575
9576 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9577 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9578 return off;
9579}
9580
9581
9582#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9583 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
9584
9585#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9586 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
9587
9588#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9589 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
9590
9591/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
9592DECL_INLINE_THROW(uint32_t)
9593iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
9594{
9595 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9596 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9597 Assert(iGRegEx < 20);
9598
9599 /* Same discussion as in iemNativeEmitFetchGregU16 */
9600 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9601 kIemNativeGstRegUse_ReadOnly);
9602
9603 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9604 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9605
9606 if (iGRegEx < 16)
9607 {
9608 switch (cbSignExtended)
9609 {
9610 case sizeof(uint16_t):
9611 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9612 break;
9613 case sizeof(uint32_t):
9614 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9615 break;
9616 case sizeof(uint64_t):
9617 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9618 break;
9619 default: AssertFailed(); break;
9620 }
9621 }
9622 else
9623 {
9624 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9625 switch (cbSignExtended)
9626 {
9627 case sizeof(uint16_t):
9628 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9629 break;
9630 case sizeof(uint32_t):
9631 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9632 break;
9633 case sizeof(uint64_t):
9634 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9635 break;
9636 default: AssertFailed(); break;
9637 }
9638 }
9639
9640 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9641 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9642 return off;
9643}
9644
9645
9646
9647#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
9648 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
9649
9650#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
9651 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9652
9653#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
9654 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9655
9656/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
9657DECL_INLINE_THROW(uint32_t)
9658iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9659{
9660 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9661 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9662 Assert(iGReg < 16);
9663
9664 /*
9665 * We can either just load the low 16-bit of the GPR into a host register
9666 * for the variable, or we can do so via a shadow copy host register. The
9667 * latter will avoid having to reload it if it's being stored later, but
9668 * will waste a host register if it isn't touched again. Since we don't
9669 * know what going to happen, we choose the latter for now.
9670 */
9671 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9672 kIemNativeGstRegUse_ReadOnly);
9673
9674 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9675 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9676 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9677 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9678
9679 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9680 return off;
9681}
9682
9683
9684#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
9685 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9686
9687#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
9688 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9689
9690/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
9691DECL_INLINE_THROW(uint32_t)
9692iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
9693{
9694 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9695 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9696 Assert(iGReg < 16);
9697
9698 /*
9699 * We can either just load the low 16-bit of the GPR into a host register
9700 * for the variable, or we can do so via a shadow copy host register. The
9701 * latter will avoid having to reload it if it's being stored later, but
9702 * will waste a host register if it isn't touched again. Since we don't
9703 * know what going to happen, we choose the latter for now.
9704 */
9705 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9706 kIemNativeGstRegUse_ReadOnly);
9707
9708 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9709 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9710 if (cbSignExtended == sizeof(uint32_t))
9711 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9712 else
9713 {
9714 Assert(cbSignExtended == sizeof(uint64_t));
9715 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9716 }
9717 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9718
9719 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9720 return off;
9721}
9722
9723
9724#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
9725 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
9726
9727#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
9728 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
9729
9730/** Emits code for IEM_MC_FETCH_GREG_U32. */
9731DECL_INLINE_THROW(uint32_t)
9732iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9733{
9734 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9735 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9736 Assert(iGReg < 16);
9737
9738 /*
9739 * We can either just load the low 16-bit of the GPR into a host register
9740 * for the variable, or we can do so via a shadow copy host register. The
9741 * latter will avoid having to reload it if it's being stored later, but
9742 * will waste a host register if it isn't touched again. Since we don't
9743 * know what going to happen, we choose the latter for now.
9744 */
9745 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9746 kIemNativeGstRegUse_ReadOnly);
9747
9748 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9749 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9750 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9751 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9752
9753 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9754 return off;
9755}
9756
9757
9758#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
9759 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
9760
9761/** Emits code for IEM_MC_FETCH_GREG_U32. */
9762DECL_INLINE_THROW(uint32_t)
9763iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9764{
9765 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9766 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9767 Assert(iGReg < 16);
9768
9769 /*
9770 * We can either just load the low 32-bit of the GPR into a host register
9771 * for the variable, or we can do so via a shadow copy host register. The
9772 * latter will avoid having to reload it if it's being stored later, but
9773 * will waste a host register if it isn't touched again. Since we don't
9774 * know what going to happen, we choose the latter for now.
9775 */
9776 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9777 kIemNativeGstRegUse_ReadOnly);
9778
9779 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9780 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9781 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9782 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9783
9784 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9785 return off;
9786}
9787
9788
9789#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
9790 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9791
9792#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
9793 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9794
9795/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
9796 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
9797DECL_INLINE_THROW(uint32_t)
9798iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9799{
9800 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9801 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9802 Assert(iGReg < 16);
9803
9804 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9805 kIemNativeGstRegUse_ReadOnly);
9806
9807 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9808 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9809 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
9810 /** @todo name the register a shadow one already? */
9811 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9812
9813 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9814 return off;
9815}
9816
9817
9818
9819/*********************************************************************************************************************************
9820* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
9821*********************************************************************************************************************************/
9822
9823#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
9824 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
9825
9826/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
9827DECL_INLINE_THROW(uint32_t)
9828iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
9829{
9830 Assert(iGRegEx < 20);
9831 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9832 kIemNativeGstRegUse_ForUpdate);
9833#ifdef RT_ARCH_AMD64
9834 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9835
9836 /* To the lowest byte of the register: mov r8, imm8 */
9837 if (iGRegEx < 16)
9838 {
9839 if (idxGstTmpReg >= 8)
9840 pbCodeBuf[off++] = X86_OP_REX_B;
9841 else if (idxGstTmpReg >= 4)
9842 pbCodeBuf[off++] = X86_OP_REX;
9843 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9844 pbCodeBuf[off++] = u8Value;
9845 }
9846 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
9847 else if (idxGstTmpReg < 4)
9848 {
9849 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
9850 pbCodeBuf[off++] = u8Value;
9851 }
9852 else
9853 {
9854 /* ror reg64, 8 */
9855 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9856 pbCodeBuf[off++] = 0xc1;
9857 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9858 pbCodeBuf[off++] = 8;
9859
9860 /* mov reg8, imm8 */
9861 if (idxGstTmpReg >= 8)
9862 pbCodeBuf[off++] = X86_OP_REX_B;
9863 else if (idxGstTmpReg >= 4)
9864 pbCodeBuf[off++] = X86_OP_REX;
9865 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9866 pbCodeBuf[off++] = u8Value;
9867
9868 /* rol reg64, 8 */
9869 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9870 pbCodeBuf[off++] = 0xc1;
9871 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9872 pbCodeBuf[off++] = 8;
9873 }
9874
9875#elif defined(RT_ARCH_ARM64)
9876 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9877 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9878 if (iGRegEx < 16)
9879 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9880 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9881 else
9882 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9883 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9884 iemNativeRegFreeTmp(pReNative, idxImmReg);
9885
9886#else
9887# error "Port me!"
9888#endif
9889
9890 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9891
9892 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9893
9894 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9895 return off;
9896}
9897
9898
9899#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9900 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9901
9902/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9903DECL_INLINE_THROW(uint32_t)
9904iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9905{
9906 Assert(iGRegEx < 20);
9907 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9908
9909 /*
9910 * If it's a constant value (unlikely) we treat this as a
9911 * IEM_MC_STORE_GREG_U8_CONST statement.
9912 */
9913 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
9914 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
9915 { /* likely */ }
9916 else
9917 {
9918 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
9919 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9920 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
9921 }
9922
9923 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9924 kIemNativeGstRegUse_ForUpdate);
9925 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9926
9927#ifdef RT_ARCH_AMD64
9928 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
9929 if (iGRegEx < 16)
9930 {
9931 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
9932 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9933 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9934 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9935 pbCodeBuf[off++] = X86_OP_REX;
9936 pbCodeBuf[off++] = 0x8a;
9937 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9938 }
9939 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
9940 else if (idxGstTmpReg < 4 && idxVarReg < 4)
9941 {
9942 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
9943 pbCodeBuf[off++] = 0x8a;
9944 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
9945 }
9946 else
9947 {
9948 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
9949
9950 /* ror reg64, 8 */
9951 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9952 pbCodeBuf[off++] = 0xc1;
9953 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9954 pbCodeBuf[off++] = 8;
9955
9956 /* mov reg8, reg8(r/m) */
9957 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9958 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9959 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9960 pbCodeBuf[off++] = X86_OP_REX;
9961 pbCodeBuf[off++] = 0x8a;
9962 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9963
9964 /* rol reg64, 8 */
9965 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9966 pbCodeBuf[off++] = 0xc1;
9967 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9968 pbCodeBuf[off++] = 8;
9969 }
9970
9971#elif defined(RT_ARCH_ARM64)
9972 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
9973 or
9974 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
9975 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9976 if (iGRegEx < 16)
9977 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
9978 else
9979 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
9980
9981#else
9982# error "Port me!"
9983#endif
9984 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9985
9986 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9987
9988 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9989 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9990 return off;
9991}
9992
9993
9994
9995#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
9996 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
9997
9998/** Emits code for IEM_MC_STORE_GREG_U16. */
9999DECL_INLINE_THROW(uint32_t)
10000iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
10001{
10002 Assert(iGReg < 16);
10003 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10004 kIemNativeGstRegUse_ForUpdate);
10005#ifdef RT_ARCH_AMD64
10006 /* mov reg16, imm16 */
10007 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
10008 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10009 if (idxGstTmpReg >= 8)
10010 pbCodeBuf[off++] = X86_OP_REX_B;
10011 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
10012 pbCodeBuf[off++] = RT_BYTE1(uValue);
10013 pbCodeBuf[off++] = RT_BYTE2(uValue);
10014
10015#elif defined(RT_ARCH_ARM64)
10016 /* movk xdst, #uValue, lsl #0 */
10017 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10018 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
10019
10020#else
10021# error "Port me!"
10022#endif
10023
10024 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10025
10026 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10027 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10028 return off;
10029}
10030
10031
10032#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
10033 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
10034
10035/** Emits code for IEM_MC_STORE_GREG_U16. */
10036DECL_INLINE_THROW(uint32_t)
10037iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10038{
10039 Assert(iGReg < 16);
10040 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10041
10042 /*
10043 * If it's a constant value (unlikely) we treat this as a
10044 * IEM_MC_STORE_GREG_U16_CONST statement.
10045 */
10046 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10047 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10048 { /* likely */ }
10049 else
10050 {
10051 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10052 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10053 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
10054 }
10055
10056 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10057 kIemNativeGstRegUse_ForUpdate);
10058
10059#ifdef RT_ARCH_AMD64
10060 /* mov reg16, reg16 or [mem16] */
10061 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
10062 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10063 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10064 {
10065 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
10066 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
10067 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
10068 pbCodeBuf[off++] = 0x8b;
10069 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
10070 }
10071 else
10072 {
10073 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
10074 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10075 if (idxGstTmpReg >= 8)
10076 pbCodeBuf[off++] = X86_OP_REX_R;
10077 pbCodeBuf[off++] = 0x8b;
10078 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
10079 }
10080
10081#elif defined(RT_ARCH_ARM64)
10082 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
10083 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
10084 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10085 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
10086 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10087
10088#else
10089# error "Port me!"
10090#endif
10091
10092 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10093
10094 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10095 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10096 return off;
10097}
10098
10099
10100#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
10101 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
10102
10103/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
10104DECL_INLINE_THROW(uint32_t)
10105iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
10106{
10107 Assert(iGReg < 16);
10108 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10109 kIemNativeGstRegUse_ForFullWrite);
10110 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
10111 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10112 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10113 return off;
10114}
10115
10116
10117#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
10118 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
10119
10120/** Emits code for IEM_MC_STORE_GREG_U32. */
10121DECL_INLINE_THROW(uint32_t)
10122iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10123{
10124 Assert(iGReg < 16);
10125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10126
10127 /*
10128 * If it's a constant value (unlikely) we treat this as a
10129 * IEM_MC_STORE_GREG_U32_CONST statement.
10130 */
10131 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10132 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10133 { /* likely */ }
10134 else
10135 {
10136 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10137 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10138 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
10139 }
10140
10141 /*
10142 * For the rest we allocate a guest register for the variable and writes
10143 * it to the CPUMCTX structure.
10144 */
10145 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
10146 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10147#ifdef VBOX_STRICT
10148 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
10149#endif
10150 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10151 return off;
10152}
10153
10154
10155#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
10156 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
10157
10158/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
10159DECL_INLINE_THROW(uint32_t)
10160iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
10161{
10162 Assert(iGReg < 16);
10163 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10164 kIemNativeGstRegUse_ForFullWrite);
10165 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
10166 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10167 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10168 return off;
10169}
10170
10171
10172#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
10173 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
10174
10175/** Emits code for IEM_MC_STORE_GREG_U64. */
10176DECL_INLINE_THROW(uint32_t)
10177iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10178{
10179 Assert(iGReg < 16);
10180 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10181
10182 /*
10183 * If it's a constant value (unlikely) we treat this as a
10184 * IEM_MC_STORE_GREG_U64_CONST statement.
10185 */
10186 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10187 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10188 { /* likely */ }
10189 else
10190 {
10191 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10192 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10193 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
10194 }
10195
10196 /*
10197 * For the rest we allocate a guest register for the variable and writes
10198 * it to the CPUMCTX structure.
10199 */
10200 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
10201 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10202 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10203 return off;
10204}
10205
10206
10207#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
10208 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
10209
10210/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
10211DECL_INLINE_THROW(uint32_t)
10212iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
10213{
10214 Assert(iGReg < 16);
10215 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10216 kIemNativeGstRegUse_ForUpdate);
10217 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
10218 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10219 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10220 return off;
10221}
10222
10223
10224/*********************************************************************************************************************************
10225* General purpose register manipulation (add, sub). *
10226*********************************************************************************************************************************/
10227
10228#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
10229 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
10230
10231/** Emits code for IEM_MC_ADD_GREG_U16. */
10232DECL_INLINE_THROW(uint32_t)
10233iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
10234{
10235 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10236 kIemNativeGstRegUse_ForUpdate);
10237
10238#ifdef RT_ARCH_AMD64
10239 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
10240 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10241 if (idxGstTmpReg >= 8)
10242 pbCodeBuf[off++] = X86_OP_REX_B;
10243 if (uAddend == 1)
10244 {
10245 pbCodeBuf[off++] = 0xff; /* inc */
10246 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10247 }
10248 else
10249 {
10250 pbCodeBuf[off++] = 0x81;
10251 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10252 pbCodeBuf[off++] = uAddend;
10253 pbCodeBuf[off++] = 0;
10254 }
10255
10256#else
10257 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
10258 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10259
10260 /* sub tmp, gstgrp, uAddend */
10261 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
10262
10263 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
10264 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
10265
10266 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10267#endif
10268
10269 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10270
10271 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10272
10273 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10274 return off;
10275}
10276
10277
10278#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
10279 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10280
10281#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
10282 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10283
10284/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
10285DECL_INLINE_THROW(uint32_t)
10286iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
10287{
10288 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10289 kIemNativeGstRegUse_ForUpdate);
10290
10291#ifdef RT_ARCH_AMD64
10292 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10293 if (f64Bit)
10294 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10295 else if (idxGstTmpReg >= 8)
10296 pbCodeBuf[off++] = X86_OP_REX_B;
10297 if (uAddend == 1)
10298 {
10299 pbCodeBuf[off++] = 0xff; /* inc */
10300 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10301 }
10302 else if (uAddend < 128)
10303 {
10304 pbCodeBuf[off++] = 0x83; /* add */
10305 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10306 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10307 }
10308 else
10309 {
10310 pbCodeBuf[off++] = 0x81; /* add */
10311 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10312 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10313 pbCodeBuf[off++] = 0;
10314 pbCodeBuf[off++] = 0;
10315 pbCodeBuf[off++] = 0;
10316 }
10317
10318#else
10319 /* sub tmp, gstgrp, uAddend */
10320 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10321 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
10322
10323#endif
10324
10325 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10326
10327 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10328
10329 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10330 return off;
10331}
10332
10333
10334
10335#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
10336 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
10337
10338/** Emits code for IEM_MC_SUB_GREG_U16. */
10339DECL_INLINE_THROW(uint32_t)
10340iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
10341{
10342 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10343 kIemNativeGstRegUse_ForUpdate);
10344
10345#ifdef RT_ARCH_AMD64
10346 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
10347 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10348 if (idxGstTmpReg >= 8)
10349 pbCodeBuf[off++] = X86_OP_REX_B;
10350 if (uSubtrahend == 1)
10351 {
10352 pbCodeBuf[off++] = 0xff; /* dec */
10353 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10354 }
10355 else
10356 {
10357 pbCodeBuf[off++] = 0x81;
10358 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10359 pbCodeBuf[off++] = uSubtrahend;
10360 pbCodeBuf[off++] = 0;
10361 }
10362
10363#else
10364 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
10365 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10366
10367 /* sub tmp, gstgrp, uSubtrahend */
10368 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
10369
10370 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
10371 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
10372
10373 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10374#endif
10375
10376 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10377
10378 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10379
10380 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10381 return off;
10382}
10383
10384
10385#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
10386 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10387
10388#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
10389 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10390
10391/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
10392DECL_INLINE_THROW(uint32_t)
10393iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
10394{
10395 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10396 kIemNativeGstRegUse_ForUpdate);
10397
10398#ifdef RT_ARCH_AMD64
10399 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10400 if (f64Bit)
10401 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10402 else if (idxGstTmpReg >= 8)
10403 pbCodeBuf[off++] = X86_OP_REX_B;
10404 if (uSubtrahend == 1)
10405 {
10406 pbCodeBuf[off++] = 0xff; /* dec */
10407 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10408 }
10409 else if (uSubtrahend < 128)
10410 {
10411 pbCodeBuf[off++] = 0x83; /* sub */
10412 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10413 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10414 }
10415 else
10416 {
10417 pbCodeBuf[off++] = 0x81; /* sub */
10418 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10419 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10420 pbCodeBuf[off++] = 0;
10421 pbCodeBuf[off++] = 0;
10422 pbCodeBuf[off++] = 0;
10423 }
10424
10425#else
10426 /* sub tmp, gstgrp, uSubtrahend */
10427 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10428 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
10429
10430#endif
10431
10432 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10433
10434 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10435
10436 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10437 return off;
10438}
10439
10440
10441/*********************************************************************************************************************************
10442* Local variable manipulation (add, sub, and, or). *
10443*********************************************************************************************************************************/
10444
10445#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
10446 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10447
10448#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
10449 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10450
10451#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
10452 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10453
10454#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
10455 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10456
10457/** Emits code for AND'ing a local and a constant value. */
10458DECL_INLINE_THROW(uint32_t)
10459iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10460{
10461#ifdef VBOX_STRICT
10462 switch (cbMask)
10463 {
10464 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10465 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10466 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10467 case sizeof(uint64_t): break;
10468 default: AssertFailedBreak();
10469 }
10470#endif
10471
10472 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10473 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10474
10475 if (cbMask <= sizeof(uint32_t))
10476 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
10477 else
10478 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
10479
10480 iemNativeVarRegisterRelease(pReNative, idxVar);
10481 return off;
10482}
10483
10484
10485#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
10486 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10487
10488#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
10489 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10490
10491#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
10492 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10493
10494#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
10495 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10496
10497/** Emits code for OR'ing a local and a constant value. */
10498DECL_INLINE_THROW(uint32_t)
10499iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10500{
10501#ifdef VBOX_STRICT
10502 switch (cbMask)
10503 {
10504 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10505 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10506 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10507 case sizeof(uint64_t): break;
10508 default: AssertFailedBreak();
10509 }
10510#endif
10511
10512 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10513 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10514
10515 if (cbMask <= sizeof(uint32_t))
10516 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
10517 else
10518 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
10519
10520 iemNativeVarRegisterRelease(pReNative, idxVar);
10521 return off;
10522}
10523
10524
10525#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
10526 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
10527
10528#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
10529 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
10530
10531#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
10532 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
10533
10534/** Emits code for reversing the byte order in a local value. */
10535DECL_INLINE_THROW(uint32_t)
10536iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
10537{
10538 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10539 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
10540
10541 switch (cbLocal)
10542 {
10543 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
10544 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
10545 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
10546 default: AssertFailedBreak();
10547 }
10548
10549 iemNativeVarRegisterRelease(pReNative, idxVar);
10550 return off;
10551}
10552
10553
10554
10555/*********************************************************************************************************************************
10556* EFLAGS *
10557*********************************************************************************************************************************/
10558
10559#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10560# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
10561#else
10562# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
10563 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
10564
10565DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
10566{
10567 if (fEflOutput)
10568 {
10569 PVMCPUCC const pVCpu = pReNative->pVCpu;
10570# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10571 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
10572 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
10573 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
10574# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10575 if (fEflOutput & (a_fEfl)) \
10576 { \
10577 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
10578 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10579 else \
10580 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10581 } else do { } while (0)
10582# else
10583 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
10584 IEMLIVENESSBIT const LivenessClobbered =
10585 {
10586 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10587 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10588 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10589 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10590 };
10591 IEMLIVENESSBIT const LivenessDelayable =
10592 {
10593 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10594 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10595 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10596 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10597 };
10598# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10599 if (fEflOutput & (a_fEfl)) \
10600 { \
10601 if (LivenessClobbered.a_fLivenessMember) \
10602 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10603 else if (LivenessDelayable.a_fLivenessMember) \
10604 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
10605 else \
10606 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10607 } else do { } while (0)
10608# endif
10609 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
10610 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
10611 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
10612 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
10613 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
10614 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
10615 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
10616# undef CHECK_FLAG_AND_UPDATE_STATS
10617 }
10618 RT_NOREF(fEflInput);
10619}
10620#endif /* VBOX_WITH_STATISTICS */
10621
10622#undef IEM_MC_FETCH_EFLAGS /* should not be used */
10623#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10624 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
10625
10626/** Handles IEM_MC_FETCH_EFLAGS_EX. */
10627DECL_INLINE_THROW(uint32_t)
10628iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
10629 uint32_t fEflInput, uint32_t fEflOutput)
10630{
10631 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
10632 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10633 RT_NOREF(fEflInput, fEflOutput);
10634
10635#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10636# ifdef VBOX_STRICT
10637 if ( pReNative->idxCurCall != 0
10638 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
10639 {
10640 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
10641 uint32_t const fBoth = fEflInput | fEflOutput;
10642# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
10643 AssertMsg( !(fBoth & (a_fElfConst)) \
10644 || (!(fEflInput & (a_fElfConst)) \
10645 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10646 : !(fEflOutput & (a_fElfConst)) \
10647 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10648 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
10649 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
10650 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
10651 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
10652 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
10653 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
10654 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
10655 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
10656 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
10657# undef ASSERT_ONE_EFL
10658 }
10659# endif
10660#endif
10661
10662 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
10663 * the existing shadow copy. */
10664 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
10665 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10666 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
10667 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10668 return off;
10669}
10670
10671
10672
10673/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
10674 * start using it with custom native code emission (inlining assembly
10675 * instruction helpers). */
10676#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
10677#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10678 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10679 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
10680
10681/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
10682DECL_INLINE_THROW(uint32_t)
10683iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
10684{
10685 RT_NOREF(fEflOutput);
10686 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
10687 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10688
10689#ifdef VBOX_STRICT
10690 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
10691 uint32_t offFixup = off;
10692 off = iemNativeEmitJnzToFixed(pReNative, off, off);
10693 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
10694 iemNativeFixupFixedJump(pReNative, offFixup, off);
10695
10696 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
10697 offFixup = off;
10698 off = iemNativeEmitJzToFixed(pReNative, off, off);
10699 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
10700 iemNativeFixupFixedJump(pReNative, offFixup, off);
10701
10702 /** @todo validate that only bits in the fElfOutput mask changed. */
10703#endif
10704
10705 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10706 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
10707 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10708 return off;
10709}
10710
10711
10712
10713/*********************************************************************************************************************************
10714* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
10715*********************************************************************************************************************************/
10716
10717#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
10718 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
10719
10720#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
10721 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
10722
10723#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
10724 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
10725
10726
10727/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
10728 * IEM_MC_FETCH_SREG_ZX_U64. */
10729DECL_INLINE_THROW(uint32_t)
10730iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
10731{
10732 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10733 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
10734 Assert(iSReg < X86_SREG_COUNT);
10735
10736 /*
10737 * For now, we will not create a shadow copy of a selector. The rational
10738 * is that since we do not recompile the popping and loading of segment
10739 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
10740 * pushing and moving to registers, there is only a small chance that the
10741 * shadow copy will be accessed again before the register is reloaded. One
10742 * scenario would be nested called in 16-bit code, but I doubt it's worth
10743 * the extra register pressure atm.
10744 *
10745 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
10746 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
10747 * store scencario covered at present (r160730).
10748 */
10749 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10750 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10751 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
10752 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10753 return off;
10754}
10755
10756
10757
10758/*********************************************************************************************************************************
10759* Register references. *
10760*********************************************************************************************************************************/
10761
10762#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
10763 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
10764
10765#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
10766 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
10767
10768/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
10769DECL_INLINE_THROW(uint32_t)
10770iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
10771{
10772 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10773 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10774 Assert(iGRegEx < 20);
10775
10776 if (iGRegEx < 16)
10777 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10778 else
10779 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
10780
10781 /* If we've delayed writing back the register value, flush it now. */
10782 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10783
10784 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10785 if (!fConst)
10786 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
10787
10788 return off;
10789}
10790
10791#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
10792 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
10793
10794#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
10795 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
10796
10797#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
10798 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
10799
10800#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
10801 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
10802
10803#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
10804 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
10805
10806#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
10807 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
10808
10809#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
10810 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
10811
10812#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
10813 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
10814
10815#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
10816 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
10817
10818#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
10819 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
10820
10821/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
10822DECL_INLINE_THROW(uint32_t)
10823iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
10824{
10825 Assert(iGReg < 16);
10826 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
10827 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10828
10829 /* If we've delayed writing back the register value, flush it now. */
10830 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
10831
10832 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10833 if (!fConst)
10834 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
10835
10836 return off;
10837}
10838
10839
10840#undef IEM_MC_REF_EFLAGS /* should not be used. */
10841#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
10842 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10843 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
10844
10845/** Handles IEM_MC_REF_EFLAGS. */
10846DECL_INLINE_THROW(uint32_t)
10847iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10848{
10849 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
10850 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10851
10852 /* If we've delayed writing back the register value, flush it now. */
10853 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
10854
10855 /* If there is a shadow copy of guest EFLAGS, flush it now. */
10856 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
10857
10858 return off;
10859}
10860
10861
10862/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
10863 * different code from threaded recompiler, maybe it would be helpful. For now
10864 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
10865#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
10866
10867
10868#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
10869 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
10870
10871#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
10872 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
10873
10874#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
10875 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
10876
10877/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
10878DECL_INLINE_THROW(uint32_t)
10879iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
10880{
10881 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10882 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10883 Assert(iXReg < 16);
10884
10885 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
10886
10887 /* If we've delayed writing back the register value, flush it now. */
10888 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
10889
10890 /** @todo r=aeichner This needs to be done as soon as we shadow SSE registers in host registers, needs
10891 * figuring out the semantics on how this is tracked.
10892 * For now this is safe though as the reference will directly operate on the CPUMCTX
10893 * structure so the value can't get out of sync.
10894 */
10895#if 0
10896 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10897 if (!fConst)
10898 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_XREG(iXReg)));
10899#else
10900 RT_NOREF(fConst);
10901#endif
10902
10903 return off;
10904}
10905
10906
10907
10908/*********************************************************************************************************************************
10909* Effective Address Calculation *
10910*********************************************************************************************************************************/
10911#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
10912 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
10913
10914/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
10915 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
10916DECL_INLINE_THROW(uint32_t)
10917iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10918 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
10919{
10920 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10921
10922 /*
10923 * Handle the disp16 form with no registers first.
10924 *
10925 * Convert to an immediate value, as that'll delay the register allocation
10926 * and assignment till the memory access / call / whatever and we can use
10927 * a more appropriate register (or none at all).
10928 */
10929 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
10930 {
10931 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
10932 return off;
10933 }
10934
10935 /* Determin the displacment. */
10936 uint16_t u16EffAddr;
10937 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10938 {
10939 case 0: u16EffAddr = 0; break;
10940 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
10941 case 2: u16EffAddr = u16Disp; break;
10942 default: AssertFailedStmt(u16EffAddr = 0);
10943 }
10944
10945 /* Determine the registers involved. */
10946 uint8_t idxGstRegBase;
10947 uint8_t idxGstRegIndex;
10948 switch (bRm & X86_MODRM_RM_MASK)
10949 {
10950 case 0:
10951 idxGstRegBase = X86_GREG_xBX;
10952 idxGstRegIndex = X86_GREG_xSI;
10953 break;
10954 case 1:
10955 idxGstRegBase = X86_GREG_xBX;
10956 idxGstRegIndex = X86_GREG_xDI;
10957 break;
10958 case 2:
10959 idxGstRegBase = X86_GREG_xBP;
10960 idxGstRegIndex = X86_GREG_xSI;
10961 break;
10962 case 3:
10963 idxGstRegBase = X86_GREG_xBP;
10964 idxGstRegIndex = X86_GREG_xDI;
10965 break;
10966 case 4:
10967 idxGstRegBase = X86_GREG_xSI;
10968 idxGstRegIndex = UINT8_MAX;
10969 break;
10970 case 5:
10971 idxGstRegBase = X86_GREG_xDI;
10972 idxGstRegIndex = UINT8_MAX;
10973 break;
10974 case 6:
10975 idxGstRegBase = X86_GREG_xBP;
10976 idxGstRegIndex = UINT8_MAX;
10977 break;
10978#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
10979 default:
10980#endif
10981 case 7:
10982 idxGstRegBase = X86_GREG_xBX;
10983 idxGstRegIndex = UINT8_MAX;
10984 break;
10985 }
10986
10987 /*
10988 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
10989 */
10990 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10991 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10992 kIemNativeGstRegUse_ReadOnly);
10993 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
10994 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10995 kIemNativeGstRegUse_ReadOnly)
10996 : UINT8_MAX;
10997#ifdef RT_ARCH_AMD64
10998 if (idxRegIndex == UINT8_MAX)
10999 {
11000 if (u16EffAddr == 0)
11001 {
11002 /* movxz ret, base */
11003 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
11004 }
11005 else
11006 {
11007 /* lea ret32, [base64 + disp32] */
11008 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11009 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11010 if (idxRegRet >= 8 || idxRegBase >= 8)
11011 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
11012 pbCodeBuf[off++] = 0x8d;
11013 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11014 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
11015 else
11016 {
11017 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
11018 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11019 }
11020 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
11021 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
11022 pbCodeBuf[off++] = 0;
11023 pbCodeBuf[off++] = 0;
11024 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11025
11026 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
11027 }
11028 }
11029 else
11030 {
11031 /* lea ret32, [index64 + base64 (+ disp32)] */
11032 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11033 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11034 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11035 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11036 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11037 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11038 pbCodeBuf[off++] = 0x8d;
11039 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
11040 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11041 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
11042 if (bMod == X86_MOD_MEM4)
11043 {
11044 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
11045 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
11046 pbCodeBuf[off++] = 0;
11047 pbCodeBuf[off++] = 0;
11048 }
11049 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11050 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
11051 }
11052
11053#elif defined(RT_ARCH_ARM64)
11054 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11055 if (u16EffAddr == 0)
11056 {
11057 if (idxRegIndex == UINT8_MAX)
11058 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
11059 else
11060 {
11061 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
11062 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
11063 }
11064 }
11065 else
11066 {
11067 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
11068 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
11069 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
11070 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
11071 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
11072 else
11073 {
11074 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
11075 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
11076 }
11077 if (idxRegIndex != UINT8_MAX)
11078 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
11079 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
11080 }
11081
11082#else
11083# error "port me"
11084#endif
11085
11086 if (idxRegIndex != UINT8_MAX)
11087 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11088 iemNativeRegFreeTmp(pReNative, idxRegBase);
11089 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11090 return off;
11091}
11092
11093
11094#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
11095 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
11096
11097/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
11098 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
11099DECL_INLINE_THROW(uint32_t)
11100iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
11101 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
11102{
11103 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
11104
11105 /*
11106 * Handle the disp32 form with no registers first.
11107 *
11108 * Convert to an immediate value, as that'll delay the register allocation
11109 * and assignment till the memory access / call / whatever and we can use
11110 * a more appropriate register (or none at all).
11111 */
11112 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
11113 {
11114 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
11115 return off;
11116 }
11117
11118 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
11119 uint32_t u32EffAddr = 0;
11120 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11121 {
11122 case 0: break;
11123 case 1: u32EffAddr = (int8_t)u32Disp; break;
11124 case 2: u32EffAddr = u32Disp; break;
11125 default: AssertFailed();
11126 }
11127
11128 /* Get the register (or SIB) value. */
11129 uint8_t idxGstRegBase = UINT8_MAX;
11130 uint8_t idxGstRegIndex = UINT8_MAX;
11131 uint8_t cShiftIndex = 0;
11132 switch (bRm & X86_MODRM_RM_MASK)
11133 {
11134 case 0: idxGstRegBase = X86_GREG_xAX; break;
11135 case 1: idxGstRegBase = X86_GREG_xCX; break;
11136 case 2: idxGstRegBase = X86_GREG_xDX; break;
11137 case 3: idxGstRegBase = X86_GREG_xBX; break;
11138 case 4: /* SIB */
11139 {
11140 /* index /w scaling . */
11141 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
11142 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
11143 {
11144 case 0: idxGstRegIndex = X86_GREG_xAX; break;
11145 case 1: idxGstRegIndex = X86_GREG_xCX; break;
11146 case 2: idxGstRegIndex = X86_GREG_xDX; break;
11147 case 3: idxGstRegIndex = X86_GREG_xBX; break;
11148 case 4: cShiftIndex = 0; /*no index*/ break;
11149 case 5: idxGstRegIndex = X86_GREG_xBP; break;
11150 case 6: idxGstRegIndex = X86_GREG_xSI; break;
11151 case 7: idxGstRegIndex = X86_GREG_xDI; break;
11152 }
11153
11154 /* base */
11155 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
11156 {
11157 case 0: idxGstRegBase = X86_GREG_xAX; break;
11158 case 1: idxGstRegBase = X86_GREG_xCX; break;
11159 case 2: idxGstRegBase = X86_GREG_xDX; break;
11160 case 3: idxGstRegBase = X86_GREG_xBX; break;
11161 case 4:
11162 idxGstRegBase = X86_GREG_xSP;
11163 u32EffAddr += uSibAndRspOffset >> 8;
11164 break;
11165 case 5:
11166 if ((bRm & X86_MODRM_MOD_MASK) != 0)
11167 idxGstRegBase = X86_GREG_xBP;
11168 else
11169 {
11170 Assert(u32EffAddr == 0);
11171 u32EffAddr = u32Disp;
11172 }
11173 break;
11174 case 6: idxGstRegBase = X86_GREG_xSI; break;
11175 case 7: idxGstRegBase = X86_GREG_xDI; break;
11176 }
11177 break;
11178 }
11179 case 5: idxGstRegBase = X86_GREG_xBP; break;
11180 case 6: idxGstRegBase = X86_GREG_xSI; break;
11181 case 7: idxGstRegBase = X86_GREG_xDI; break;
11182 }
11183
11184 /*
11185 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11186 * the start of the function.
11187 */
11188 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11189 {
11190 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
11191 return off;
11192 }
11193
11194 /*
11195 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11196 */
11197 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11198 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11199 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11200 kIemNativeGstRegUse_ReadOnly);
11201 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11202 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11203 kIemNativeGstRegUse_ReadOnly);
11204
11205 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11206 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11207 {
11208 idxRegBase = idxRegIndex;
11209 idxRegIndex = UINT8_MAX;
11210 }
11211
11212#ifdef RT_ARCH_AMD64
11213 if (idxRegIndex == UINT8_MAX)
11214 {
11215 if (u32EffAddr == 0)
11216 {
11217 /* mov ret, base */
11218 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11219 }
11220 else
11221 {
11222 /* lea ret32, [base64 + disp32] */
11223 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11224 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11225 if (idxRegRet >= 8 || idxRegBase >= 8)
11226 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
11227 pbCodeBuf[off++] = 0x8d;
11228 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11229 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11230 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11231 else
11232 {
11233 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11234 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11235 }
11236 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11237 if (bMod == X86_MOD_MEM4)
11238 {
11239 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11240 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11241 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11242 }
11243 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11244 }
11245 }
11246 else
11247 {
11248 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11249 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11250 if (idxRegBase == UINT8_MAX)
11251 {
11252 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
11253 if (idxRegRet >= 8 || idxRegIndex >= 8)
11254 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11255 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11256 pbCodeBuf[off++] = 0x8d;
11257 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11258 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11259 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11260 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11261 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11262 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11263 }
11264 else
11265 {
11266 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11267 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11268 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11269 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11270 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11271 pbCodeBuf[off++] = 0x8d;
11272 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11273 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11274 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11275 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11276 if (bMod != X86_MOD_MEM0)
11277 {
11278 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11279 if (bMod == X86_MOD_MEM4)
11280 {
11281 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11282 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11283 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11284 }
11285 }
11286 }
11287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11288 }
11289
11290#elif defined(RT_ARCH_ARM64)
11291 if (u32EffAddr == 0)
11292 {
11293 if (idxRegIndex == UINT8_MAX)
11294 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11295 else if (idxRegBase == UINT8_MAX)
11296 {
11297 if (cShiftIndex == 0)
11298 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
11299 else
11300 {
11301 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11302 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
11303 }
11304 }
11305 else
11306 {
11307 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11308 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11309 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11310 }
11311 }
11312 else
11313 {
11314 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
11315 {
11316 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11317 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
11318 }
11319 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
11320 {
11321 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11322 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
11323 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
11324 }
11325 else
11326 {
11327 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
11328 if (idxRegBase != UINT8_MAX)
11329 {
11330 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11331 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
11332 }
11333 }
11334 if (idxRegIndex != UINT8_MAX)
11335 {
11336 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11337 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11338 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11339 }
11340 }
11341
11342#else
11343# error "port me"
11344#endif
11345
11346 if (idxRegIndex != UINT8_MAX)
11347 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11348 if (idxRegBase != UINT8_MAX)
11349 iemNativeRegFreeTmp(pReNative, idxRegBase);
11350 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11351 return off;
11352}
11353
11354
11355#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11356 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11357 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11358
11359#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11360 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11361 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11362
11363#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11364 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11365 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
11366
11367/**
11368 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
11369 *
11370 * @returns New off.
11371 * @param pReNative .
11372 * @param off .
11373 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
11374 * bit 4 to REX.X. The two bits are part of the
11375 * REG sub-field, which isn't needed in this
11376 * function.
11377 * @param uSibAndRspOffset Two parts:
11378 * - The first 8 bits make up the SIB byte.
11379 * - The next 8 bits are the fixed RSP/ESP offset
11380 * in case of a pop [xSP].
11381 * @param u32Disp The displacement byte/word/dword, if any.
11382 * @param cbInstr The size of the fully decoded instruction. Used
11383 * for RIP relative addressing.
11384 * @param idxVarRet The result variable number.
11385 * @param f64Bit Whether to use a 64-bit or 32-bit address size
11386 * when calculating the address.
11387 *
11388 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
11389 */
11390DECL_INLINE_THROW(uint32_t)
11391iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
11392 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
11393{
11394 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
11395
11396 /*
11397 * Special case the rip + disp32 form first.
11398 */
11399 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
11400 {
11401#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
11402 /* Need to take the current PC offset into account for the displacement, no need to flush here
11403 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
11404 u32Disp += pReNative->Core.offPc;
11405#endif
11406
11407 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11408 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
11409 kIemNativeGstRegUse_ReadOnly);
11410#ifdef RT_ARCH_AMD64
11411 if (f64Bit)
11412 {
11413 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
11414 if ((int32_t)offFinalDisp == offFinalDisp)
11415 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
11416 else
11417 {
11418 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
11419 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
11420 }
11421 }
11422 else
11423 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
11424
11425#elif defined(RT_ARCH_ARM64)
11426 if (f64Bit)
11427 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11428 (int64_t)(int32_t)u32Disp + cbInstr);
11429 else
11430 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11431 (int32_t)u32Disp + cbInstr);
11432
11433#else
11434# error "Port me!"
11435#endif
11436 iemNativeRegFreeTmp(pReNative, idxRegPc);
11437 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11438 return off;
11439 }
11440
11441 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
11442 int64_t i64EffAddr = 0;
11443 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11444 {
11445 case 0: break;
11446 case 1: i64EffAddr = (int8_t)u32Disp; break;
11447 case 2: i64EffAddr = (int32_t)u32Disp; break;
11448 default: AssertFailed();
11449 }
11450
11451 /* Get the register (or SIB) value. */
11452 uint8_t idxGstRegBase = UINT8_MAX;
11453 uint8_t idxGstRegIndex = UINT8_MAX;
11454 uint8_t cShiftIndex = 0;
11455 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
11456 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
11457 else /* SIB: */
11458 {
11459 /* index /w scaling . */
11460 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
11461 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
11462 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
11463 if (idxGstRegIndex == 4)
11464 {
11465 /* no index */
11466 cShiftIndex = 0;
11467 idxGstRegIndex = UINT8_MAX;
11468 }
11469
11470 /* base */
11471 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
11472 if (idxGstRegBase == 4)
11473 {
11474 /* pop [rsp] hack */
11475 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
11476 }
11477 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
11478 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
11479 {
11480 /* mod=0 and base=5 -> disp32, no base reg. */
11481 Assert(i64EffAddr == 0);
11482 i64EffAddr = (int32_t)u32Disp;
11483 idxGstRegBase = UINT8_MAX;
11484 }
11485 }
11486
11487 /*
11488 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11489 * the start of the function.
11490 */
11491 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11492 {
11493 if (f64Bit)
11494 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
11495 else
11496 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
11497 return off;
11498 }
11499
11500 /*
11501 * Now emit code that calculates:
11502 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11503 * or if !f64Bit:
11504 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11505 */
11506 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11507 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11508 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11509 kIemNativeGstRegUse_ReadOnly);
11510 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11511 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11512 kIemNativeGstRegUse_ReadOnly);
11513
11514 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11515 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11516 {
11517 idxRegBase = idxRegIndex;
11518 idxRegIndex = UINT8_MAX;
11519 }
11520
11521#ifdef RT_ARCH_AMD64
11522 uint8_t bFinalAdj;
11523 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
11524 bFinalAdj = 0; /* likely */
11525 else
11526 {
11527 /* pop [rsp] with a problematic disp32 value. Split out the
11528 RSP offset and add it separately afterwards (bFinalAdj). */
11529 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
11530 Assert(idxGstRegBase == X86_GREG_xSP);
11531 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
11532 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
11533 Assert(bFinalAdj != 0);
11534 i64EffAddr -= bFinalAdj;
11535 Assert((int32_t)i64EffAddr == i64EffAddr);
11536 }
11537 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
11538//pReNative->pInstrBuf[off++] = 0xcc;
11539
11540 if (idxRegIndex == UINT8_MAX)
11541 {
11542 if (u32EffAddr == 0)
11543 {
11544 /* mov ret, base */
11545 if (f64Bit)
11546 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
11547 else
11548 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11549 }
11550 else
11551 {
11552 /* lea ret, [base + disp32] */
11553 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11554 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11555 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
11556 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11557 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11558 | (f64Bit ? X86_OP_REX_W : 0);
11559 pbCodeBuf[off++] = 0x8d;
11560 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11561 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11562 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11563 else
11564 {
11565 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11566 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11567 }
11568 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11569 if (bMod == X86_MOD_MEM4)
11570 {
11571 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11572 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11573 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11574 }
11575 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11576 }
11577 }
11578 else
11579 {
11580 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11581 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11582 if (idxRegBase == UINT8_MAX)
11583 {
11584 /* lea ret, [(index64 << cShiftIndex) + disp32] */
11585 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
11586 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11587 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11588 | (f64Bit ? X86_OP_REX_W : 0);
11589 pbCodeBuf[off++] = 0x8d;
11590 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11591 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11592 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11593 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11594 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11595 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11596 }
11597 else
11598 {
11599 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11600 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11601 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11602 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11603 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11604 | (f64Bit ? X86_OP_REX_W : 0);
11605 pbCodeBuf[off++] = 0x8d;
11606 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11607 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11608 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11609 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11610 if (bMod != X86_MOD_MEM0)
11611 {
11612 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11613 if (bMod == X86_MOD_MEM4)
11614 {
11615 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11616 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11617 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11618 }
11619 }
11620 }
11621 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11622 }
11623
11624 if (!bFinalAdj)
11625 { /* likely */ }
11626 else
11627 {
11628 Assert(f64Bit);
11629 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
11630 }
11631
11632#elif defined(RT_ARCH_ARM64)
11633 if (i64EffAddr == 0)
11634 {
11635 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11636 if (idxRegIndex == UINT8_MAX)
11637 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
11638 else if (idxRegBase != UINT8_MAX)
11639 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11640 f64Bit, false /*fSetFlags*/, cShiftIndex);
11641 else
11642 {
11643 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
11644 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
11645 }
11646 }
11647 else
11648 {
11649 if (f64Bit)
11650 { /* likely */ }
11651 else
11652 i64EffAddr = (int32_t)i64EffAddr;
11653
11654 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
11655 {
11656 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11657 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
11658 }
11659 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
11660 {
11661 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11662 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
11663 }
11664 else
11665 {
11666 if (f64Bit)
11667 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
11668 else
11669 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
11670 if (idxRegBase != UINT8_MAX)
11671 {
11672 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11673 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
11674 }
11675 }
11676 if (idxRegIndex != UINT8_MAX)
11677 {
11678 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11679 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11680 f64Bit, false /*fSetFlags*/, cShiftIndex);
11681 }
11682 }
11683
11684#else
11685# error "port me"
11686#endif
11687
11688 if (idxRegIndex != UINT8_MAX)
11689 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11690 if (idxRegBase != UINT8_MAX)
11691 iemNativeRegFreeTmp(pReNative, idxRegBase);
11692 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11693 return off;
11694}
11695
11696
11697/*********************************************************************************************************************************
11698* TLB Lookup. *
11699*********************************************************************************************************************************/
11700
11701/**
11702 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
11703 */
11704DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
11705{
11706 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
11707 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
11708 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
11709 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
11710
11711 /* Do the lookup manually. */
11712 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
11713 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
11714 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
11715 if (RT_LIKELY(pTlbe->uTag == uTag))
11716 {
11717 /*
11718 * Check TLB page table level access flags.
11719 */
11720 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
11721 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
11722 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
11723 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
11724 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
11725 | IEMTLBE_F_PG_UNASSIGNED
11726 | IEMTLBE_F_PT_NO_ACCESSED
11727 | fNoWriteNoDirty | fNoUser);
11728 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
11729 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
11730 {
11731 /*
11732 * Return the address.
11733 */
11734 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
11735 if ((uintptr_t)pbAddr == uResult)
11736 return;
11737 RT_NOREF(cbMem);
11738 AssertFailed();
11739 }
11740 else
11741 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
11742 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
11743 }
11744 else
11745 AssertFailed();
11746 RT_BREAKPOINT();
11747}
11748
11749/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
11750
11751
11752/*********************************************************************************************************************************
11753* Memory fetches and stores common *
11754*********************************************************************************************************************************/
11755
11756typedef enum IEMNATIVEMITMEMOP
11757{
11758 kIemNativeEmitMemOp_Store = 0,
11759 kIemNativeEmitMemOp_Fetch,
11760 kIemNativeEmitMemOp_Fetch_Zx_U16,
11761 kIemNativeEmitMemOp_Fetch_Zx_U32,
11762 kIemNativeEmitMemOp_Fetch_Zx_U64,
11763 kIemNativeEmitMemOp_Fetch_Sx_U16,
11764 kIemNativeEmitMemOp_Fetch_Sx_U32,
11765 kIemNativeEmitMemOp_Fetch_Sx_U64
11766} IEMNATIVEMITMEMOP;
11767
11768/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
11769 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
11770 * (with iSegReg = UINT8_MAX). */
11771DECL_INLINE_THROW(uint32_t)
11772iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
11773 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
11774 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
11775{
11776 /*
11777 * Assert sanity.
11778 */
11779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11780 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
11781 Assert( enmOp != kIemNativeEmitMemOp_Store
11782 || pVarValue->enmKind == kIemNativeVarKind_Immediate
11783 || pVarValue->enmKind == kIemNativeVarKind_Stack);
11784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11785 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
11786 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
11787 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
11788 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11789 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11790 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
11791 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11792#ifdef VBOX_STRICT
11793 if (iSegReg == UINT8_MAX)
11794 {
11795 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11796 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11797 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11798 switch (cbMem)
11799 {
11800 case 1:
11801 Assert( pfnFunction
11802 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
11803 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11804 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11805 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11806 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11807 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
11808 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
11809 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
11810 : UINT64_C(0xc000b000a0009000) ));
11811 break;
11812 case 2:
11813 Assert( pfnFunction
11814 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
11815 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11816 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11817 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11818 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
11819 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
11820 : UINT64_C(0xc000b000a0009000) ));
11821 break;
11822 case 4:
11823 Assert( pfnFunction
11824 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
11825 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11826 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11827 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
11828 : UINT64_C(0xc000b000a0009000) ));
11829 break;
11830 case 8:
11831 Assert( pfnFunction
11832 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
11833 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
11834 : UINT64_C(0xc000b000a0009000) ));
11835 break;
11836 }
11837 }
11838 else
11839 {
11840 Assert(iSegReg < 6);
11841 switch (cbMem)
11842 {
11843 case 1:
11844 Assert( pfnFunction
11845 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
11846 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
11847 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11848 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11849 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11850 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
11851 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
11852 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
11853 : UINT64_C(0xc000b000a0009000) ));
11854 break;
11855 case 2:
11856 Assert( pfnFunction
11857 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11858 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11859 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11860 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11861 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11862 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11863 : UINT64_C(0xc000b000a0009000) ));
11864 break;
11865 case 4:
11866 Assert( pfnFunction
11867 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11868 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11869 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11870 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11871 : UINT64_C(0xc000b000a0009000) ));
11872 break;
11873 case 8:
11874 Assert( pfnFunction
11875 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11876 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11877 : UINT64_C(0xc000b000a0009000) ));
11878 break;
11879 }
11880 }
11881#endif
11882
11883#ifdef VBOX_STRICT
11884 /*
11885 * Check that the fExec flags we've got make sense.
11886 */
11887 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11888#endif
11889
11890 /*
11891 * To keep things simple we have to commit any pending writes first as we
11892 * may end up making calls.
11893 */
11894 /** @todo we could postpone this till we make the call and reload the
11895 * registers after returning from the call. Not sure if that's sensible or
11896 * not, though. */
11897#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11898 off = iemNativeRegFlushPendingWrites(pReNative, off);
11899#else
11900 /* The program counter is treated differently for now. */
11901 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
11902#endif
11903
11904#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11905 /*
11906 * Move/spill/flush stuff out of call-volatile registers.
11907 * This is the easy way out. We could contain this to the tlb-miss branch
11908 * by saving and restoring active stuff here.
11909 */
11910 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11911#endif
11912
11913 /*
11914 * Define labels and allocate the result register (trying for the return
11915 * register if we can).
11916 */
11917 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11918 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
11919 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11920 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
11921 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
11922 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
11923 uint8_t const idxRegValueStore = !TlbState.fSkip
11924 && enmOp == kIemNativeEmitMemOp_Store
11925 && pVarValue->enmKind != kIemNativeVarKind_Immediate
11926 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
11927 : UINT8_MAX;
11928 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11929 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11930 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11931 : UINT32_MAX;
11932
11933 /*
11934 * Jump to the TLB lookup code.
11935 */
11936 if (!TlbState.fSkip)
11937 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11938
11939 /*
11940 * TlbMiss:
11941 *
11942 * Call helper to do the fetching.
11943 * We flush all guest register shadow copies here.
11944 */
11945 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11946
11947#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11948 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11949#else
11950 RT_NOREF(idxInstr);
11951#endif
11952
11953#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
11954 if (pReNative->Core.offPc)
11955 {
11956 /*
11957 * Update the program counter but restore it at the end of the TlbMiss branch.
11958 * This should allow delaying more program counter updates for the TlbLookup and hit paths
11959 * which are hopefully much more frequent, reducing the amount of memory accesses.
11960 */
11961 /* Allocate a temporary PC register. */
11962 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
11963
11964 /* Perform the addition and store the result. */
11965 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
11966 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
11967
11968 /* Free and flush the PC register. */
11969 iemNativeRegFreeTmp(pReNative, idxPcReg);
11970 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
11971 }
11972#endif
11973
11974#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11975 /* Save variables in volatile registers. */
11976 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11977 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
11978 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
11979 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11980#endif
11981
11982 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
11983 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
11984 if (enmOp == kIemNativeEmitMemOp_Store)
11985 {
11986 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
11987 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
11988#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11989 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11990#else
11991 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
11992 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
11993#endif
11994 }
11995
11996 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
11997 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
11998#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11999 fVolGregMask);
12000#else
12001 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
12002#endif
12003
12004 if (iSegReg != UINT8_MAX)
12005 {
12006 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
12007 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
12008 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
12009 }
12010
12011 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12012 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12013
12014 /* Done setting up parameters, make the call. */
12015 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12016
12017 /*
12018 * Put the result in the right register if this is a fetch.
12019 */
12020 if (enmOp != kIemNativeEmitMemOp_Store)
12021 {
12022 Assert(idxRegValueFetch == pVarValue->idxReg);
12023 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
12024 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
12025 }
12026
12027#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12028 /* Restore variables and guest shadow registers to volatile registers. */
12029 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12030 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12031#endif
12032
12033#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12034 if (pReNative->Core.offPc)
12035 {
12036 /*
12037 * Time to restore the program counter to its original value.
12038 */
12039 /* Allocate a temporary PC register. */
12040 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
12041
12042 /* Restore the original value. */
12043 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
12044 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
12045
12046 /* Free and flush the PC register. */
12047 iemNativeRegFreeTmp(pReNative, idxPcReg);
12048 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
12049 }
12050#endif
12051
12052#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12053 if (!TlbState.fSkip)
12054 {
12055 /* end of TlbMiss - Jump to the done label. */
12056 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12057 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12058
12059 /*
12060 * TlbLookup:
12061 */
12062 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
12063 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
12064 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
12065
12066 /*
12067 * Emit code to do the actual storing / fetching.
12068 */
12069 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12070# ifdef VBOX_WITH_STATISTICS
12071 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12072 enmOp == kIemNativeEmitMemOp_Store
12073 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
12074 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
12075# endif
12076 switch (enmOp)
12077 {
12078 case kIemNativeEmitMemOp_Store:
12079 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
12080 {
12081 switch (cbMem)
12082 {
12083 case 1:
12084 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12085 break;
12086 case 2:
12087 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12088 break;
12089 case 4:
12090 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12091 break;
12092 case 8:
12093 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12094 break;
12095 default:
12096 AssertFailed();
12097 }
12098 }
12099 else
12100 {
12101 switch (cbMem)
12102 {
12103 case 1:
12104 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
12105 idxRegMemResult, TlbState.idxReg1);
12106 break;
12107 case 2:
12108 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
12109 idxRegMemResult, TlbState.idxReg1);
12110 break;
12111 case 4:
12112 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
12113 idxRegMemResult, TlbState.idxReg1);
12114 break;
12115 case 8:
12116 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
12117 idxRegMemResult, TlbState.idxReg1);
12118 break;
12119 default:
12120 AssertFailed();
12121 }
12122 }
12123 break;
12124
12125 case kIemNativeEmitMemOp_Fetch:
12126 case kIemNativeEmitMemOp_Fetch_Zx_U16:
12127 case kIemNativeEmitMemOp_Fetch_Zx_U32:
12128 case kIemNativeEmitMemOp_Fetch_Zx_U64:
12129 switch (cbMem)
12130 {
12131 case 1:
12132 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12133 break;
12134 case 2:
12135 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12136 break;
12137 case 4:
12138 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12139 break;
12140 case 8:
12141 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12142 break;
12143 default:
12144 AssertFailed();
12145 }
12146 break;
12147
12148 case kIemNativeEmitMemOp_Fetch_Sx_U16:
12149 Assert(cbMem == 1);
12150 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12151 break;
12152
12153 case kIemNativeEmitMemOp_Fetch_Sx_U32:
12154 Assert(cbMem == 1 || cbMem == 2);
12155 if (cbMem == 1)
12156 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12157 else
12158 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12159 break;
12160
12161 case kIemNativeEmitMemOp_Fetch_Sx_U64:
12162 switch (cbMem)
12163 {
12164 case 1:
12165 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12166 break;
12167 case 2:
12168 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12169 break;
12170 case 4:
12171 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12172 break;
12173 default:
12174 AssertFailed();
12175 }
12176 break;
12177
12178 default:
12179 AssertFailed();
12180 }
12181
12182 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12183
12184 /*
12185 * TlbDone:
12186 */
12187 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12188
12189 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12190
12191# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12192 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12193 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12194# endif
12195 }
12196#else
12197 RT_NOREF(fAlignMask, idxLabelTlbMiss);
12198#endif
12199
12200 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
12201 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12202 return off;
12203}
12204
12205
12206
12207/*********************************************************************************************************************************
12208* Memory fetches (IEM_MEM_FETCH_XXX). *
12209*********************************************************************************************************************************/
12210
12211/* 8-bit segmented: */
12212#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
12213 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
12214 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
12215 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12216
12217#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12218 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12219 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
12220 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12221
12222#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12223 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12224 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12225 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12226
12227#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12228 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12229 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12230 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12231
12232#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12233 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12234 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
12235 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
12236
12237#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12238 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12239 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12240 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
12241
12242#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12243 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12244 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12245 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
12246
12247/* 16-bit segmented: */
12248#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12249 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12250 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12251 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12252
12253#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
12254 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12255 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12256 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
12257
12258#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12259 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12260 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12261 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12262
12263#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12264 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12265 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12266 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12267
12268#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12269 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12270 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12271 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
12272
12273#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12274 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12275 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12276 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
12277
12278
12279/* 32-bit segmented: */
12280#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12281 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12282 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12283 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
12284
12285#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
12286 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12287 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12288 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
12289
12290#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12291 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12292 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12293 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
12294
12295#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12296 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12297 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12298 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
12299
12300
12301/* 64-bit segmented: */
12302#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12303 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12304 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
12305 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
12306
12307
12308
12309/* 8-bit flat: */
12310#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
12311 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
12312 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
12313 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12314
12315#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
12316 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12317 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
12318 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12319
12320#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
12321 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12322 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12323 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12324
12325#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
12326 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12327 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12328 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12329
12330#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
12331 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12332 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
12333 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
12334
12335#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
12336 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12337 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12338 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
12339
12340#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
12341 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12342 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12343 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
12344
12345
12346/* 16-bit flat: */
12347#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
12348 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12349 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12350 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12351
12352#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
12353 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12354 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12355 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
12356
12357#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
12358 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12359 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12360 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12361
12362#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
12363 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12364 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12365 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12366
12367#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
12368 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12369 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12370 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
12371
12372#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
12373 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12374 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12375 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
12376
12377/* 32-bit flat: */
12378#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
12379 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12380 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12381 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12382
12383#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
12384 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12385 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12386 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
12387
12388#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
12389 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12390 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12391 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12392
12393#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
12394 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12395 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12396 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
12397
12398/* 64-bit flat: */
12399#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
12400 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12401 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
12402 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
12403
12404
12405
12406/*********************************************************************************************************************************
12407* Memory stores (IEM_MEM_STORE_XXX). *
12408*********************************************************************************************************************************/
12409
12410#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
12411 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
12412 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12413 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12414
12415#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
12416 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
12417 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12418 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12419
12420#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
12421 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
12422 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12423 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12424
12425#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
12426 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
12427 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12428 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12429
12430
12431#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
12432 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
12433 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12434 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12435
12436#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
12437 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
12438 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12439 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12440
12441#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
12442 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
12443 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12444 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12445
12446#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
12447 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
12448 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12449 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12450
12451
12452#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
12453 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12454 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12455
12456#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
12457 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12458 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12459
12460#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
12461 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12462 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12463
12464#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
12465 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12466 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12467
12468
12469#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
12470 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12471 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12472
12473#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
12474 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12475 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12476
12477#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
12478 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12479 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12480
12481#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
12482 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12483 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12484
12485/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
12486 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
12487DECL_INLINE_THROW(uint32_t)
12488iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
12489 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
12490{
12491 /*
12492 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
12493 * to do the grunt work.
12494 */
12495 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
12496 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
12497 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
12498 pfnFunction, idxInstr);
12499 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
12500 return off;
12501}
12502
12503
12504
12505/*********************************************************************************************************************************
12506* Stack Accesses. *
12507*********************************************************************************************************************************/
12508/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
12509#define IEM_MC_PUSH_U16(a_u16Value) \
12510 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12511 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
12512#define IEM_MC_PUSH_U32(a_u32Value) \
12513 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12514 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
12515#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
12516 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
12517 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
12518#define IEM_MC_PUSH_U64(a_u64Value) \
12519 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12520 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
12521
12522#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
12523 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12524 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12525#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
12526 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12527 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
12528#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
12529 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
12530 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
12531
12532#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
12533 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12534 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12535#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
12536 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12537 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
12538
12539
12540DECL_FORCE_INLINE_THROW(uint32_t)
12541iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12542{
12543 /* Use16BitSp: */
12544#ifdef RT_ARCH_AMD64
12545 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12546 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12547#else
12548 /* sub regeff, regrsp, #cbMem */
12549 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
12550 /* and regeff, regeff, #0xffff */
12551 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12552 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
12553 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
12554 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
12555#endif
12556 return off;
12557}
12558
12559
12560DECL_FORCE_INLINE(uint32_t)
12561iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12562{
12563 /* Use32BitSp: */
12564 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12565 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12566 return off;
12567}
12568
12569
12570/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
12571DECL_INLINE_THROW(uint32_t)
12572iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
12573 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12574{
12575 /*
12576 * Assert sanity.
12577 */
12578 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12579 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12580#ifdef VBOX_STRICT
12581 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12582 {
12583 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12584 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12585 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12586 Assert( pfnFunction
12587 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12588 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
12589 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
12590 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12591 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
12592 : UINT64_C(0xc000b000a0009000) ));
12593 }
12594 else
12595 Assert( pfnFunction
12596 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
12597 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
12598 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
12599 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
12600 : UINT64_C(0xc000b000a0009000) ));
12601#endif
12602
12603#ifdef VBOX_STRICT
12604 /*
12605 * Check that the fExec flags we've got make sense.
12606 */
12607 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12608#endif
12609
12610 /*
12611 * To keep things simple we have to commit any pending writes first as we
12612 * may end up making calls.
12613 */
12614 /** @todo we could postpone this till we make the call and reload the
12615 * registers after returning from the call. Not sure if that's sensible or
12616 * not, though. */
12617 off = iemNativeRegFlushPendingWrites(pReNative, off);
12618
12619 /*
12620 * First we calculate the new RSP and the effective stack pointer value.
12621 * For 64-bit mode and flat 32-bit these two are the same.
12622 * (Code structure is very similar to that of PUSH)
12623 */
12624 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12625 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
12626 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
12627 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
12628 ? cbMem : sizeof(uint16_t);
12629 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12630 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12631 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12632 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12633 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12634 if (cBitsFlat != 0)
12635 {
12636 Assert(idxRegEffSp == idxRegRsp);
12637 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12638 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12639 if (cBitsFlat == 64)
12640 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
12641 else
12642 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
12643 }
12644 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12645 {
12646 Assert(idxRegEffSp != idxRegRsp);
12647 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12648 kIemNativeGstRegUse_ReadOnly);
12649#ifdef RT_ARCH_AMD64
12650 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12651#else
12652 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12653#endif
12654 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12655 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12656 offFixupJumpToUseOtherBitSp = off;
12657 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12658 {
12659 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12660 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12661 }
12662 else
12663 {
12664 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12665 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12666 }
12667 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12668 }
12669 /* SpUpdateEnd: */
12670 uint32_t const offLabelSpUpdateEnd = off;
12671
12672 /*
12673 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12674 * we're skipping lookup).
12675 */
12676 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12677 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
12678 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12679 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12680 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12681 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12682 : UINT32_MAX;
12683 uint8_t const idxRegValue = !TlbState.fSkip
12684 && pVarValue->enmKind != kIemNativeVarKind_Immediate
12685 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
12686 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
12687 : UINT8_MAX;
12688 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
12689
12690
12691 if (!TlbState.fSkip)
12692 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12693 else
12694 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12695
12696 /*
12697 * Use16BitSp:
12698 */
12699 if (cBitsFlat == 0)
12700 {
12701#ifdef RT_ARCH_AMD64
12702 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12703#else
12704 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12705#endif
12706 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12707 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12708 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12709 else
12710 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12711 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12712 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12713 }
12714
12715 /*
12716 * TlbMiss:
12717 *
12718 * Call helper to do the pushing.
12719 */
12720 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12721
12722#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12723 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12724#else
12725 RT_NOREF(idxInstr);
12726#endif
12727
12728 /* Save variables in volatile registers. */
12729 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12730 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12731 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
12732 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
12733 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12734
12735 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
12736 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
12737 {
12738 /* Swap them using ARG0 as temp register: */
12739 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
12740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
12741 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
12742 }
12743 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
12744 {
12745 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
12746 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
12747 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12748
12749 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
12750 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12751 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12752 }
12753 else
12754 {
12755 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
12756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12757
12758 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
12759 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
12760 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
12761 }
12762
12763 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12764 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12765
12766 /* Done setting up parameters, make the call. */
12767 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12768
12769 /* Restore variables and guest shadow registers to volatile registers. */
12770 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12771 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12772
12773#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12774 if (!TlbState.fSkip)
12775 {
12776 /* end of TlbMiss - Jump to the done label. */
12777 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12778 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12779
12780 /*
12781 * TlbLookup:
12782 */
12783 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
12784 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12785
12786 /*
12787 * Emit code to do the actual storing / fetching.
12788 */
12789 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12790# ifdef VBOX_WITH_STATISTICS
12791 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12792 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12793# endif
12794 if (idxRegValue != UINT8_MAX)
12795 {
12796 switch (cbMemAccess)
12797 {
12798 case 2:
12799 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12800 break;
12801 case 4:
12802 if (!fIsIntelSeg)
12803 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12804 else
12805 {
12806 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
12807 PUSH FS in real mode, so we have to try emulate that here.
12808 We borrow the now unused idxReg1 from the TLB lookup code here. */
12809 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
12810 kIemNativeGstReg_EFlags);
12811 if (idxRegEfl != UINT8_MAX)
12812 {
12813#ifdef ARCH_AMD64
12814 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
12815 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12816 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12817#else
12818 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
12819 off, TlbState.idxReg1, idxRegEfl,
12820 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12821#endif
12822 iemNativeRegFreeTmp(pReNative, idxRegEfl);
12823 }
12824 else
12825 {
12826 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
12827 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
12828 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12829 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12830 }
12831 /* ASSUMES the upper half of idxRegValue is ZERO. */
12832 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
12833 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
12834 }
12835 break;
12836 case 8:
12837 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12838 break;
12839 default:
12840 AssertFailed();
12841 }
12842 }
12843 else
12844 {
12845 switch (cbMemAccess)
12846 {
12847 case 2:
12848 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
12849 idxRegMemResult, TlbState.idxReg1);
12850 break;
12851 case 4:
12852 Assert(!fIsSegReg);
12853 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
12854 idxRegMemResult, TlbState.idxReg1);
12855 break;
12856 case 8:
12857 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
12858 break;
12859 default:
12860 AssertFailed();
12861 }
12862 }
12863
12864 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12865 TlbState.freeRegsAndReleaseVars(pReNative);
12866
12867 /*
12868 * TlbDone:
12869 *
12870 * Commit the new RSP value.
12871 */
12872 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12873 }
12874#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12875
12876 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
12877 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12878 if (idxRegEffSp != idxRegRsp)
12879 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12880
12881 /* The value variable is implictly flushed. */
12882 if (idxRegValue != UINT8_MAX)
12883 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12884 iemNativeVarFreeLocal(pReNative, idxVarValue);
12885
12886 return off;
12887}
12888
12889
12890
12891/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
12892#define IEM_MC_POP_GREG_U16(a_iGReg) \
12893 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12894 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
12895#define IEM_MC_POP_GREG_U32(a_iGReg) \
12896 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12897 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
12898#define IEM_MC_POP_GREG_U64(a_iGReg) \
12899 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12900 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
12901
12902#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
12903 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12904 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12905#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
12906 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12907 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
12908
12909#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
12910 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12911 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12912#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
12913 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12914 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
12915
12916
12917DECL_FORCE_INLINE_THROW(uint32_t)
12918iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
12919 uint8_t idxRegTmp)
12920{
12921 /* Use16BitSp: */
12922#ifdef RT_ARCH_AMD64
12923 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12924 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12925 RT_NOREF(idxRegTmp);
12926#else
12927 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
12928 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
12929 /* add tmp, regrsp, #cbMem */
12930 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
12931 /* and tmp, tmp, #0xffff */
12932 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12933 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
12934 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
12935 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
12936#endif
12937 return off;
12938}
12939
12940
12941DECL_FORCE_INLINE(uint32_t)
12942iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12943{
12944 /* Use32BitSp: */
12945 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12946 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12947 return off;
12948}
12949
12950
12951/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
12952DECL_INLINE_THROW(uint32_t)
12953iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
12954 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12955{
12956 /*
12957 * Assert sanity.
12958 */
12959 Assert(idxGReg < 16);
12960#ifdef VBOX_STRICT
12961 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12962 {
12963 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12964 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12965 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12966 Assert( pfnFunction
12967 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12968 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
12969 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12970 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
12971 : UINT64_C(0xc000b000a0009000) ));
12972 }
12973 else
12974 Assert( pfnFunction
12975 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
12976 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
12977 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
12978 : UINT64_C(0xc000b000a0009000) ));
12979#endif
12980
12981#ifdef VBOX_STRICT
12982 /*
12983 * Check that the fExec flags we've got make sense.
12984 */
12985 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12986#endif
12987
12988 /*
12989 * To keep things simple we have to commit any pending writes first as we
12990 * may end up making calls.
12991 */
12992 off = iemNativeRegFlushPendingWrites(pReNative, off);
12993
12994 /*
12995 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
12996 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
12997 * directly as the effective stack pointer.
12998 * (Code structure is very similar to that of PUSH)
12999 */
13000 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13001 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13002 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13003 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13004 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13005 /** @todo can do a better job picking the register here. For cbMem >= 4 this
13006 * will be the resulting register value. */
13007 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
13008
13009 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13010 if (cBitsFlat != 0)
13011 {
13012 Assert(idxRegEffSp == idxRegRsp);
13013 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13014 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13015 }
13016 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13017 {
13018 Assert(idxRegEffSp != idxRegRsp);
13019 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13020 kIemNativeGstRegUse_ReadOnly);
13021#ifdef RT_ARCH_AMD64
13022 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13023#else
13024 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13025#endif
13026 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13027 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13028 offFixupJumpToUseOtherBitSp = off;
13029 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13030 {
13031/** @todo can skip idxRegRsp updating when popping ESP. */
13032 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13033 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13034 }
13035 else
13036 {
13037 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13038 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
13039 }
13040 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13041 }
13042 /* SpUpdateEnd: */
13043 uint32_t const offLabelSpUpdateEnd = off;
13044
13045 /*
13046 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13047 * we're skipping lookup).
13048 */
13049 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13050 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
13051 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13052 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13053 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13054 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13055 : UINT32_MAX;
13056
13057 if (!TlbState.fSkip)
13058 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13059 else
13060 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13061
13062 /*
13063 * Use16BitSp:
13064 */
13065 if (cBitsFlat == 0)
13066 {
13067#ifdef RT_ARCH_AMD64
13068 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13069#else
13070 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13071#endif
13072 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13073 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13074 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
13075 else
13076 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13077 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13079 }
13080
13081 /*
13082 * TlbMiss:
13083 *
13084 * Call helper to do the pushing.
13085 */
13086 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13087
13088#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13089 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13090#else
13091 RT_NOREF(idxInstr);
13092#endif
13093
13094 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13095 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13096 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
13097 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13098
13099
13100 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
13101 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13102 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13103
13104 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13105 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13106
13107 /* Done setting up parameters, make the call. */
13108 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13109
13110 /* Move the return register content to idxRegMemResult. */
13111 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
13112 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
13113
13114 /* Restore variables and guest shadow registers to volatile registers. */
13115 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13116 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13117
13118#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13119 if (!TlbState.fSkip)
13120 {
13121 /* end of TlbMiss - Jump to the done label. */
13122 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13123 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13124
13125 /*
13126 * TlbLookup:
13127 */
13128 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
13129 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13130
13131 /*
13132 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
13133 */
13134 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13135# ifdef VBOX_WITH_STATISTICS
13136 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13137 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13138# endif
13139 switch (cbMem)
13140 {
13141 case 2:
13142 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13143 break;
13144 case 4:
13145 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13146 break;
13147 case 8:
13148 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13149 break;
13150 default:
13151 AssertFailed();
13152 }
13153
13154 TlbState.freeRegsAndReleaseVars(pReNative);
13155
13156 /*
13157 * TlbDone:
13158 *
13159 * Set the new RSP value (FLAT accesses needs to calculate it first) and
13160 * commit the popped register value.
13161 */
13162 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13163 }
13164#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
13165
13166 if (idxGReg != X86_GREG_xSP)
13167 {
13168 /* Set the register. */
13169 if (cbMem >= sizeof(uint32_t))
13170 {
13171#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13172 AssertMsg( pReNative->idxCurCall == 0
13173 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
13174 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
13175#endif
13176 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
13177 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
13178 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
13179 }
13180 else
13181 {
13182 Assert(cbMem == sizeof(uint16_t));
13183 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
13184 kIemNativeGstRegUse_ForUpdate);
13185 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
13186 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
13187 iemNativeRegFreeTmp(pReNative, idxRegDst);
13188 }
13189
13190 /* Complete RSP calculation for FLAT mode. */
13191 if (idxRegEffSp == idxRegRsp)
13192 {
13193 if (cBitsFlat == 64)
13194 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
13195 else
13196 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
13197 }
13198 }
13199 else
13200 {
13201 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
13202 if (cbMem == sizeof(uint64_t))
13203 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
13204 else if (cbMem == sizeof(uint32_t))
13205 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
13206 else
13207 {
13208 if (idxRegEffSp == idxRegRsp)
13209 {
13210 if (cBitsFlat == 64)
13211 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
13212 else
13213 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
13214 }
13215 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
13216 }
13217 }
13218 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
13219
13220 iemNativeRegFreeTmp(pReNative, idxRegRsp);
13221 if (idxRegEffSp != idxRegRsp)
13222 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
13223 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13224
13225 return off;
13226}
13227
13228
13229
13230/*********************************************************************************************************************************
13231* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
13232*********************************************************************************************************************************/
13233
13234#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13235 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13236 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
13237 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
13238
13239#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13240 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13241 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
13242 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
13243
13244#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13245 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13246 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
13247 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
13248
13249#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13250 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13251 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
13252 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
13253
13254
13255#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13256 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13257 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13258 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
13259
13260#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13261 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13262 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13263 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
13264
13265#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13266 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13267 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13268 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
13269
13270#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13271 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13272 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13273 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
13274
13275#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13276 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
13277 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13278 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
13279
13280
13281#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13282 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13283 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13284 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
13285
13286#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13287 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13288 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13289 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
13290
13291#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13292 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13293 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13294 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
13295
13296#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13297 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13298 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13299 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
13300
13301#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13302 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
13303 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13304 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
13305
13306
13307#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13308 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13309 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13310 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
13311
13312#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13313 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13314 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13315 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
13316#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13317 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13318 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13319 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
13320
13321#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13322 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13323 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13324 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
13325
13326#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13327 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
13328 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13329 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
13330
13331
13332#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13333 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
13334 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13335 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
13336
13337#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13338 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
13339 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
13340 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
13341
13342
13343#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13344 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13345 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13346 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
13347
13348#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13349 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13350 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13351 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
13352
13353#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13354 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13355 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13356 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
13357
13358#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13359 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13360 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13361 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
13362
13363
13364
13365#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13366 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13367 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
13368 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
13369
13370#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13371 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13372 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
13373 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
13374
13375#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13376 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13377 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
13378 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
13379
13380#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13381 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13382 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
13383 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
13384
13385
13386#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13387 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13388 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13389 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
13390
13391#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13392 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13393 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13394 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
13395
13396#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13397 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13398 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13399 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13400
13401#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13402 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13403 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13404 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
13405
13406#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
13407 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
13408 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13409 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13410
13411
13412#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13413 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13414 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13415 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
13416
13417#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13418 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13419 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13420 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
13421
13422#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13423 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13424 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13425 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13426
13427#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13428 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13429 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13430 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
13431
13432#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
13433 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
13434 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13435 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13436
13437
13438#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13439 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13440 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13441 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
13442
13443#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13444 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13445 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13446 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
13447
13448#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13449 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13450 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13451 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13452
13453#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13454 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13455 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13456 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
13457
13458#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
13459 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
13460 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13461 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13462
13463
13464#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
13465 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13466 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13467 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
13468
13469#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
13470 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13471 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
13472 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
13473
13474
13475#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13476 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13477 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13478 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
13479
13480#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13481 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13482 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13483 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
13484
13485#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13486 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13487 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13488 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
13489
13490#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13491 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13492 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13493 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
13494
13495
13496DECL_INLINE_THROW(uint32_t)
13497iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
13498 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
13499 uintptr_t pfnFunction, uint8_t idxInstr)
13500{
13501 /*
13502 * Assert sanity.
13503 */
13504 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
13505 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
13506 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
13507 && pVarMem->cbVar == sizeof(void *),
13508 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13509
13510 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13511 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13512 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
13513 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
13514 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13515
13516 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
13517 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
13518 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
13519 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
13520 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13521
13522 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
13523
13524 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
13525
13526#ifdef VBOX_STRICT
13527# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
13528 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
13529 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
13530 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
13531 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
13532# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
13533 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
13534 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
13535 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
13536
13537 if (iSegReg == UINT8_MAX)
13538 {
13539 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13540 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13541 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13542 switch (cbMem)
13543 {
13544 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
13545 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
13546 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
13547 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
13548 case 10:
13549 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
13550 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
13551 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13552 break;
13553 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
13554# if 0
13555 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
13556 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
13557# endif
13558 default: AssertFailed(); break;
13559 }
13560 }
13561 else
13562 {
13563 Assert(iSegReg < 6);
13564 switch (cbMem)
13565 {
13566 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
13567 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
13568 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
13569 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
13570 case 10:
13571 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
13572 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
13573 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13574 break;
13575 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
13576# if 0
13577 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
13578 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
13579# endif
13580 default: AssertFailed(); break;
13581 }
13582 }
13583# undef IEM_MAP_HLP_FN
13584# undef IEM_MAP_HLP_FN_NO_AT
13585#endif
13586
13587#ifdef VBOX_STRICT
13588 /*
13589 * Check that the fExec flags we've got make sense.
13590 */
13591 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13592#endif
13593
13594 /*
13595 * To keep things simple we have to commit any pending writes first as we
13596 * may end up making calls.
13597 */
13598 off = iemNativeRegFlushPendingWrites(pReNative, off);
13599
13600#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13601 /*
13602 * Move/spill/flush stuff out of call-volatile registers.
13603 * This is the easy way out. We could contain this to the tlb-miss branch
13604 * by saving and restoring active stuff here.
13605 */
13606 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
13607 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13608#endif
13609
13610 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
13611 while the tlb-miss codepath will temporarily put it on the stack.
13612 Set the the type to stack here so we don't need to do it twice below. */
13613 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
13614 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
13615 /** @todo use a tmp register from TlbState, since they'll be free after tlb
13616 * lookup is done. */
13617
13618 /*
13619 * Define labels and allocate the result register (trying for the return
13620 * register if we can).
13621 */
13622 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13623 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13624 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
13625 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
13626 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
13627 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13628 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13629 : UINT32_MAX;
13630//off=iemNativeEmitBrk(pReNative, off, 0);
13631 /*
13632 * Jump to the TLB lookup code.
13633 */
13634 if (!TlbState.fSkip)
13635 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13636
13637 /*
13638 * TlbMiss:
13639 *
13640 * Call helper to do the fetching.
13641 * We flush all guest register shadow copies here.
13642 */
13643 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13644
13645#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13646 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13647#else
13648 RT_NOREF(idxInstr);
13649#endif
13650
13651#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13652 /* Save variables in volatile registers. */
13653 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
13654 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13655#endif
13656
13657 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
13658 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
13659#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13660 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13661#else
13662 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13663#endif
13664
13665 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
13666 if (iSegReg != UINT8_MAX)
13667 {
13668 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13669 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
13670 }
13671
13672 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
13673 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
13674 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
13675
13676 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13677 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13678
13679 /* Done setting up parameters, make the call. */
13680 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13681
13682 /*
13683 * Put the output in the right registers.
13684 */
13685 Assert(idxRegMemResult == pVarMem->idxReg);
13686 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
13687 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
13688
13689#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13690 /* Restore variables and guest shadow registers to volatile registers. */
13691 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13692 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13693#endif
13694
13695 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
13696 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
13697
13698#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13699 if (!TlbState.fSkip)
13700 {
13701 /* end of tlbsmiss - Jump to the done label. */
13702 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13703 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13704
13705 /*
13706 * TlbLookup:
13707 */
13708 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
13709 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13710# ifdef VBOX_WITH_STATISTICS
13711 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
13712 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
13713# endif
13714
13715 /* [idxVarUnmapInfo] = 0; */
13716 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
13717
13718 /*
13719 * TlbDone:
13720 */
13721 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13722
13723 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13724
13725# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13726 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13727 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13728# endif
13729 }
13730#else
13731 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
13732#endif
13733
13734 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13735 iemNativeVarRegisterRelease(pReNative, idxVarMem);
13736
13737 return off;
13738}
13739
13740
13741#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
13742 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
13743 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
13744
13745#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
13746 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
13747 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
13748
13749#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
13750 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
13751 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
13752
13753#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
13754 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
13755 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
13756
13757DECL_INLINE_THROW(uint32_t)
13758iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
13759 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
13760{
13761 /*
13762 * Assert sanity.
13763 */
13764 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13765#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
13766 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13767#endif
13768 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
13769 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
13770 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
13771#ifdef VBOX_STRICT
13772 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
13773 {
13774 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
13775 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
13776 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
13777 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
13778 case IEM_ACCESS_TYPE_WRITE:
13779 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
13780 case IEM_ACCESS_TYPE_READ:
13781 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
13782 default: AssertFailed();
13783 }
13784#else
13785 RT_NOREF(fAccess);
13786#endif
13787
13788 /*
13789 * To keep things simple we have to commit any pending writes first as we
13790 * may end up making calls (there shouldn't be any at this point, so this
13791 * is just for consistency).
13792 */
13793 /** @todo we could postpone this till we make the call and reload the
13794 * registers after returning from the call. Not sure if that's sensible or
13795 * not, though. */
13796 off = iemNativeRegFlushPendingWrites(pReNative, off);
13797
13798 /*
13799 * Move/spill/flush stuff out of call-volatile registers.
13800 *
13801 * We exclude any register holding the bUnmapInfo variable, as we'll be
13802 * checking it after returning from the call and will free it afterwards.
13803 */
13804 /** @todo save+restore active registers and maybe guest shadows in miss
13805 * scenario. */
13806 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
13807
13808 /*
13809 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
13810 * to call the unmap helper function.
13811 *
13812 * The likelyhood of it being zero is higher than for the TLB hit when doing
13813 * the mapping, as a TLB miss for an well aligned and unproblematic memory
13814 * access should also end up with a mapping that won't need special unmapping.
13815 */
13816 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
13817 * should speed up things for the pure interpreter as well when TLBs
13818 * are enabled. */
13819#ifdef RT_ARCH_AMD64
13820 if (pVarUnmapInfo->idxReg == UINT8_MAX)
13821 {
13822 /* test byte [rbp - xxx], 0ffh */
13823 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
13824 pbCodeBuf[off++] = 0xf6;
13825 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
13826 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
13827 pbCodeBuf[off++] = 0xff;
13828 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13829 }
13830 else
13831#endif
13832 {
13833 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
13834 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
13835 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
13836 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13837 }
13838 uint32_t const offJmpFixup = off;
13839 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
13840
13841 /*
13842 * Call the unmap helper function.
13843 */
13844#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
13845 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13846#else
13847 RT_NOREF(idxInstr);
13848#endif
13849
13850 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
13851 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
13852 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13853
13854 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13855 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13856
13857 /* Done setting up parameters, make the call. */
13858 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13859
13860 /* The bUnmapInfo variable is implictly free by these MCs. */
13861 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
13862
13863 /*
13864 * Done, just fixup the jump for the non-call case.
13865 */
13866 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
13867
13868 return off;
13869}
13870
13871
13872
13873/*********************************************************************************************************************************
13874* State and Exceptions *
13875*********************************************************************************************************************************/
13876
13877#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13878#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13879
13880#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13881#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13882#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13883
13884#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13885#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13886#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13887
13888
13889DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
13890{
13891 /** @todo this needs a lot more work later. */
13892 RT_NOREF(pReNative, fForChange);
13893 return off;
13894}
13895
13896
13897
13898/*********************************************************************************************************************************
13899* Emitters for FPU related operations. *
13900*********************************************************************************************************************************/
13901
13902#define IEM_MC_FETCH_FCW(a_u16Fcw) \
13903 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
13904
13905/** Emits code for IEM_MC_FETCH_FCW. */
13906DECL_INLINE_THROW(uint32_t)
13907iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13908{
13909 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13910 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
13911
13912 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
13913
13914 /* Allocate a temporary FCW register. */
13915 /** @todo eliminate extra register */
13916 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
13917 kIemNativeGstRegUse_ReadOnly);
13918
13919 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
13920
13921 /* Free but don't flush the FCW register. */
13922 iemNativeRegFreeTmp(pReNative, idxFcwReg);
13923 iemNativeVarRegisterRelease(pReNative, idxDstVar);
13924
13925 return off;
13926}
13927
13928
13929#define IEM_MC_FETCH_FSW(a_u16Fsw) \
13930 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
13931
13932/** Emits code for IEM_MC_FETCH_FSW. */
13933DECL_INLINE_THROW(uint32_t)
13934iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13935{
13936 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13937 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
13938
13939 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
13940 /* Allocate a temporary FSW register. */
13941 /** @todo eliminate extra register */
13942 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
13943 kIemNativeGstRegUse_ReadOnly);
13944
13945 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
13946
13947 /* Free but don't flush the FSW register. */
13948 iemNativeRegFreeTmp(pReNative, idxFswReg);
13949 iemNativeVarRegisterRelease(pReNative, idxDstVar);
13950
13951 return off;
13952}
13953
13954
13955
13956/*********************************************************************************************************************************
13957* The native code generator functions for each MC block. *
13958*********************************************************************************************************************************/
13959
13960/*
13961 * Include instruction emitters.
13962 */
13963#include "target-x86/IEMAllN8veEmit-x86.h"
13964
13965/*
13966 * Include g_apfnIemNativeRecompileFunctions and associated functions.
13967 *
13968 * This should probably live in it's own file later, but lets see what the
13969 * compile times turn out to be first.
13970 */
13971#include "IEMNativeFunctions.cpp.h"
13972
13973
13974
13975/*********************************************************************************************************************************
13976* Recompiler Core. *
13977*********************************************************************************************************************************/
13978
13979
13980/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
13981static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
13982{
13983 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
13984 pDis->cbCachedInstr += cbMaxRead;
13985 RT_NOREF(cbMinRead);
13986 return VERR_NO_DATA;
13987}
13988
13989
13990DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
13991{
13992 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
13993 {
13994#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
13995 ENTRY(fLocalForcedActions),
13996 ENTRY(iem.s.rcPassUp),
13997 ENTRY(iem.s.fExec),
13998 ENTRY(iem.s.pbInstrBuf),
13999 ENTRY(iem.s.uInstrBufPc),
14000 ENTRY(iem.s.GCPhysInstrBuf),
14001 ENTRY(iem.s.cbInstrBufTotal),
14002 ENTRY(iem.s.idxTbCurInstr),
14003#ifdef VBOX_WITH_STATISTICS
14004 ENTRY(iem.s.StatNativeTlbHitsForFetch),
14005 ENTRY(iem.s.StatNativeTlbHitsForStore),
14006 ENTRY(iem.s.StatNativeTlbHitsForStack),
14007 ENTRY(iem.s.StatNativeTlbHitsForMapped),
14008 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
14009 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
14010 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
14011 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
14012#endif
14013 ENTRY(iem.s.DataTlb.aEntries),
14014 ENTRY(iem.s.DataTlb.uTlbRevision),
14015 ENTRY(iem.s.DataTlb.uTlbPhysRev),
14016 ENTRY(iem.s.DataTlb.cTlbHits),
14017 ENTRY(iem.s.CodeTlb.aEntries),
14018 ENTRY(iem.s.CodeTlb.uTlbRevision),
14019 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
14020 ENTRY(iem.s.CodeTlb.cTlbHits),
14021 ENTRY(pVMR3),
14022 ENTRY(cpum.GstCtx.rax),
14023 ENTRY(cpum.GstCtx.ah),
14024 ENTRY(cpum.GstCtx.rcx),
14025 ENTRY(cpum.GstCtx.ch),
14026 ENTRY(cpum.GstCtx.rdx),
14027 ENTRY(cpum.GstCtx.dh),
14028 ENTRY(cpum.GstCtx.rbx),
14029 ENTRY(cpum.GstCtx.bh),
14030 ENTRY(cpum.GstCtx.rsp),
14031 ENTRY(cpum.GstCtx.rbp),
14032 ENTRY(cpum.GstCtx.rsi),
14033 ENTRY(cpum.GstCtx.rdi),
14034 ENTRY(cpum.GstCtx.r8),
14035 ENTRY(cpum.GstCtx.r9),
14036 ENTRY(cpum.GstCtx.r10),
14037 ENTRY(cpum.GstCtx.r11),
14038 ENTRY(cpum.GstCtx.r12),
14039 ENTRY(cpum.GstCtx.r13),
14040 ENTRY(cpum.GstCtx.r14),
14041 ENTRY(cpum.GstCtx.r15),
14042 ENTRY(cpum.GstCtx.es.Sel),
14043 ENTRY(cpum.GstCtx.es.u64Base),
14044 ENTRY(cpum.GstCtx.es.u32Limit),
14045 ENTRY(cpum.GstCtx.es.Attr),
14046 ENTRY(cpum.GstCtx.cs.Sel),
14047 ENTRY(cpum.GstCtx.cs.u64Base),
14048 ENTRY(cpum.GstCtx.cs.u32Limit),
14049 ENTRY(cpum.GstCtx.cs.Attr),
14050 ENTRY(cpum.GstCtx.ss.Sel),
14051 ENTRY(cpum.GstCtx.ss.u64Base),
14052 ENTRY(cpum.GstCtx.ss.u32Limit),
14053 ENTRY(cpum.GstCtx.ss.Attr),
14054 ENTRY(cpum.GstCtx.ds.Sel),
14055 ENTRY(cpum.GstCtx.ds.u64Base),
14056 ENTRY(cpum.GstCtx.ds.u32Limit),
14057 ENTRY(cpum.GstCtx.ds.Attr),
14058 ENTRY(cpum.GstCtx.fs.Sel),
14059 ENTRY(cpum.GstCtx.fs.u64Base),
14060 ENTRY(cpum.GstCtx.fs.u32Limit),
14061 ENTRY(cpum.GstCtx.fs.Attr),
14062 ENTRY(cpum.GstCtx.gs.Sel),
14063 ENTRY(cpum.GstCtx.gs.u64Base),
14064 ENTRY(cpum.GstCtx.gs.u32Limit),
14065 ENTRY(cpum.GstCtx.gs.Attr),
14066 ENTRY(cpum.GstCtx.rip),
14067 ENTRY(cpum.GstCtx.eflags),
14068 ENTRY(cpum.GstCtx.uRipInhibitInt),
14069#undef ENTRY
14070 };
14071#ifdef VBOX_STRICT
14072 static bool s_fOrderChecked = false;
14073 if (!s_fOrderChecked)
14074 {
14075 s_fOrderChecked = true;
14076 uint32_t offPrev = s_aMembers[0].off;
14077 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
14078 {
14079 Assert(s_aMembers[i].off > offPrev);
14080 offPrev = s_aMembers[i].off;
14081 }
14082 }
14083#endif
14084
14085 /*
14086 * Binary lookup.
14087 */
14088 unsigned iStart = 0;
14089 unsigned iEnd = RT_ELEMENTS(s_aMembers);
14090 for (;;)
14091 {
14092 unsigned const iCur = iStart + (iEnd - iStart) / 2;
14093 uint32_t const offCur = s_aMembers[iCur].off;
14094 if (off < offCur)
14095 {
14096 if (iCur != iStart)
14097 iEnd = iCur;
14098 else
14099 break;
14100 }
14101 else if (off > offCur)
14102 {
14103 if (iCur + 1 < iEnd)
14104 iStart = iCur + 1;
14105 else
14106 break;
14107 }
14108 else
14109 return s_aMembers[iCur].pszName;
14110 }
14111#ifdef VBOX_WITH_STATISTICS
14112 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
14113 return "iem.s.acThreadedFuncStats[iFn]";
14114#endif
14115 return NULL;
14116}
14117
14118
14119/**
14120 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
14121 * @returns pszBuf.
14122 * @param fFlags The flags.
14123 * @param pszBuf The output buffer.
14124 * @param cbBuf The output buffer size. At least 32 bytes.
14125 */
14126DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
14127{
14128 Assert(cbBuf >= 32);
14129 static RTSTRTUPLE const s_aModes[] =
14130 {
14131 /* [00] = */ { RT_STR_TUPLE("16BIT") },
14132 /* [01] = */ { RT_STR_TUPLE("32BIT") },
14133 /* [02] = */ { RT_STR_TUPLE("!2!") },
14134 /* [03] = */ { RT_STR_TUPLE("!3!") },
14135 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
14136 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
14137 /* [06] = */ { RT_STR_TUPLE("!6!") },
14138 /* [07] = */ { RT_STR_TUPLE("!7!") },
14139 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
14140 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
14141 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
14142 /* [0b] = */ { RT_STR_TUPLE("!b!") },
14143 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
14144 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
14145 /* [0e] = */ { RT_STR_TUPLE("!e!") },
14146 /* [0f] = */ { RT_STR_TUPLE("!f!") },
14147 /* [10] = */ { RT_STR_TUPLE("!10!") },
14148 /* [11] = */ { RT_STR_TUPLE("!11!") },
14149 /* [12] = */ { RT_STR_TUPLE("!12!") },
14150 /* [13] = */ { RT_STR_TUPLE("!13!") },
14151 /* [14] = */ { RT_STR_TUPLE("!14!") },
14152 /* [15] = */ { RT_STR_TUPLE("!15!") },
14153 /* [16] = */ { RT_STR_TUPLE("!16!") },
14154 /* [17] = */ { RT_STR_TUPLE("!17!") },
14155 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
14156 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
14157 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
14158 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
14159 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
14160 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
14161 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
14162 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
14163 };
14164 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
14165 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
14166 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
14167
14168 pszBuf[off++] = ' ';
14169 pszBuf[off++] = 'C';
14170 pszBuf[off++] = 'P';
14171 pszBuf[off++] = 'L';
14172 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
14173 Assert(off < 32);
14174
14175 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
14176
14177 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
14178 {
14179 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
14180 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
14181 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
14182 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
14183 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
14184 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
14185 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
14186 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
14187 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
14188 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
14189 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
14190 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
14191 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
14192 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
14193 };
14194 if (fFlags)
14195 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
14196 if (s_aFlags[i].fFlag & fFlags)
14197 {
14198 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
14199 pszBuf[off++] = ' ';
14200 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
14201 off += s_aFlags[i].cchName;
14202 fFlags &= ~s_aFlags[i].fFlag;
14203 if (!fFlags)
14204 break;
14205 }
14206 pszBuf[off] = '\0';
14207
14208 return pszBuf;
14209}
14210
14211
14212DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
14213{
14214 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
14215#if defined(RT_ARCH_AMD64)
14216 static const char * const a_apszMarkers[] =
14217 {
14218 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
14219 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
14220 };
14221#endif
14222
14223 char szDisBuf[512];
14224 DISSTATE Dis;
14225 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
14226 uint32_t const cNative = pTb->Native.cInstructions;
14227 uint32_t offNative = 0;
14228#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14229 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
14230#endif
14231 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
14232 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
14233 : DISCPUMODE_64BIT;
14234#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14235 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
14236#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14237 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
14238#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14239# error "Port me"
14240#else
14241 csh hDisasm = ~(size_t)0;
14242# if defined(RT_ARCH_AMD64)
14243 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
14244# elif defined(RT_ARCH_ARM64)
14245 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
14246# else
14247# error "Port me"
14248# endif
14249 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
14250
14251 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
14252 //Assert(rcCs == CS_ERR_OK);
14253#endif
14254
14255 /*
14256 * Print TB info.
14257 */
14258 pHlp->pfnPrintf(pHlp,
14259 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
14260 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
14261 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
14262 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
14263#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14264 if (pDbgInfo && pDbgInfo->cEntries > 1)
14265 {
14266 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
14267
14268 /*
14269 * This disassembly is driven by the debug info which follows the native
14270 * code and indicates when it starts with the next guest instructions,
14271 * where labels are and such things.
14272 */
14273 uint32_t idxThreadedCall = 0;
14274 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
14275 uint8_t idxRange = UINT8_MAX;
14276 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
14277 uint32_t offRange = 0;
14278 uint32_t offOpcodes = 0;
14279 uint32_t const cbOpcodes = pTb->cbOpcodes;
14280 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
14281 uint32_t const cDbgEntries = pDbgInfo->cEntries;
14282 uint32_t iDbgEntry = 1;
14283 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
14284
14285 while (offNative < cNative)
14286 {
14287 /* If we're at or have passed the point where the next chunk of debug
14288 info starts, process it. */
14289 if (offDbgNativeNext <= offNative)
14290 {
14291 offDbgNativeNext = UINT32_MAX;
14292 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
14293 {
14294 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
14295 {
14296 case kIemTbDbgEntryType_GuestInstruction:
14297 {
14298 /* Did the exec flag change? */
14299 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
14300 {
14301 pHlp->pfnPrintf(pHlp,
14302 " fExec change %#08x -> %#08x %s\n",
14303 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
14304 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
14305 szDisBuf, sizeof(szDisBuf)));
14306 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
14307 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
14308 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
14309 : DISCPUMODE_64BIT;
14310 }
14311
14312 /* New opcode range? We need to fend up a spurious debug info entry here for cases
14313 where the compilation was aborted before the opcode was recorded and the actual
14314 instruction was translated to a threaded call. This may happen when we run out
14315 of ranges, or when some complicated interrupts/FFs are found to be pending or
14316 similar. So, we just deal with it here rather than in the compiler code as it
14317 is a lot simpler to do here. */
14318 if ( idxRange == UINT8_MAX
14319 || idxRange >= cRanges
14320 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
14321 {
14322 idxRange += 1;
14323 if (idxRange < cRanges)
14324 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
14325 else
14326 continue;
14327 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
14328 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
14329 + (pTb->aRanges[idxRange].idxPhysPage == 0
14330 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14331 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
14332 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14333 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
14334 pTb->aRanges[idxRange].idxPhysPage);
14335 GCPhysPc += offRange;
14336 }
14337
14338 /* Disassemble the instruction. */
14339 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
14340 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
14341 uint32_t cbInstr = 1;
14342 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14343 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
14344 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14345 if (RT_SUCCESS(rc))
14346 {
14347 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14348 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14349 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14350 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14351
14352 static unsigned const s_offMarker = 55;
14353 static char const s_szMarker[] = " ; <--- guest";
14354 if (cch < s_offMarker)
14355 {
14356 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
14357 cch = s_offMarker;
14358 }
14359 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
14360 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
14361
14362 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
14363 }
14364 else
14365 {
14366 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
14367 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
14368 cbInstr = 1;
14369 }
14370 GCPhysPc += cbInstr;
14371 offOpcodes += cbInstr;
14372 offRange += cbInstr;
14373 continue;
14374 }
14375
14376 case kIemTbDbgEntryType_ThreadedCall:
14377 pHlp->pfnPrintf(pHlp,
14378 " Call #%u to %s (%u args) - %s\n",
14379 idxThreadedCall,
14380 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14381 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14382 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
14383 idxThreadedCall++;
14384 continue;
14385
14386 case kIemTbDbgEntryType_GuestRegShadowing:
14387 {
14388 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
14389 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
14390 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
14391 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
14392 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14393 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
14394 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
14395 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
14396 else
14397 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
14398 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
14399 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14400 continue;
14401 }
14402
14403 case kIemTbDbgEntryType_Label:
14404 {
14405 const char *pszName = "what_the_fudge";
14406 const char *pszComment = "";
14407 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
14408 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
14409 {
14410 case kIemNativeLabelType_Return:
14411 pszName = "Return";
14412 break;
14413 case kIemNativeLabelType_ReturnBreak:
14414 pszName = "ReturnBreak";
14415 break;
14416 case kIemNativeLabelType_ReturnWithFlags:
14417 pszName = "ReturnWithFlags";
14418 break;
14419 case kIemNativeLabelType_NonZeroRetOrPassUp:
14420 pszName = "NonZeroRetOrPassUp";
14421 break;
14422 case kIemNativeLabelType_RaiseGp0:
14423 pszName = "RaiseGp0";
14424 break;
14425 case kIemNativeLabelType_RaiseNm:
14426 pszName = "RaiseNm";
14427 break;
14428 case kIemNativeLabelType_RaiseUd:
14429 pszName = "RaiseUd";
14430 break;
14431 case kIemNativeLabelType_RaiseMf:
14432 pszName = "RaiseMf";
14433 break;
14434 case kIemNativeLabelType_ObsoleteTb:
14435 pszName = "ObsoleteTb";
14436 break;
14437 case kIemNativeLabelType_NeedCsLimChecking:
14438 pszName = "NeedCsLimChecking";
14439 break;
14440 case kIemNativeLabelType_CheckBranchMiss:
14441 pszName = "CheckBranchMiss";
14442 break;
14443 case kIemNativeLabelType_If:
14444 pszName = "If";
14445 fNumbered = true;
14446 break;
14447 case kIemNativeLabelType_Else:
14448 pszName = "Else";
14449 fNumbered = true;
14450 pszComment = " ; regs state restored pre-if-block";
14451 break;
14452 case kIemNativeLabelType_Endif:
14453 pszName = "Endif";
14454 fNumbered = true;
14455 break;
14456 case kIemNativeLabelType_CheckIrq:
14457 pszName = "CheckIrq_CheckVM";
14458 fNumbered = true;
14459 break;
14460 case kIemNativeLabelType_TlbLookup:
14461 pszName = "TlbLookup";
14462 fNumbered = true;
14463 break;
14464 case kIemNativeLabelType_TlbMiss:
14465 pszName = "TlbMiss";
14466 fNumbered = true;
14467 break;
14468 case kIemNativeLabelType_TlbDone:
14469 pszName = "TlbDone";
14470 fNumbered = true;
14471 break;
14472 case kIemNativeLabelType_Invalid:
14473 case kIemNativeLabelType_End:
14474 break;
14475 }
14476 if (fNumbered)
14477 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
14478 else
14479 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
14480 continue;
14481 }
14482
14483 case kIemTbDbgEntryType_NativeOffset:
14484 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
14485 Assert(offDbgNativeNext > offNative);
14486 break;
14487
14488#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
14489 case kIemTbDbgEntryType_DelayedPcUpdate:
14490 pHlp->pfnPrintf(pHlp,
14491 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
14492 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
14493 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
14494 continue;
14495#endif
14496
14497 default:
14498 AssertFailed();
14499 }
14500 iDbgEntry++;
14501 break;
14502 }
14503 }
14504
14505 /*
14506 * Disassemble the next native instruction.
14507 */
14508 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14509# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14510 uint32_t cbInstr = sizeof(paNative[0]);
14511 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14512 if (RT_SUCCESS(rc))
14513 {
14514# if defined(RT_ARCH_AMD64)
14515 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14516 {
14517 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14518 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14519 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14520 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14521 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14522 uInfo & 0x8000 ? "recompiled" : "todo");
14523 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14524 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14525 else
14526 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14527 }
14528 else
14529# endif
14530 {
14531 const char *pszAnnotation = NULL;
14532# ifdef RT_ARCH_AMD64
14533 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14534 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14535 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14536 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14537 PCDISOPPARAM pMemOp;
14538 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
14539 pMemOp = &Dis.Param1;
14540 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
14541 pMemOp = &Dis.Param2;
14542 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
14543 pMemOp = &Dis.Param3;
14544 else
14545 pMemOp = NULL;
14546 if ( pMemOp
14547 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
14548 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
14549 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
14550 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
14551
14552#elif defined(RT_ARCH_ARM64)
14553 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14554 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14555 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14556# else
14557# error "Port me"
14558# endif
14559 if (pszAnnotation)
14560 {
14561 static unsigned const s_offAnnotation = 55;
14562 size_t const cchAnnotation = strlen(pszAnnotation);
14563 size_t cchDis = strlen(szDisBuf);
14564 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
14565 {
14566 if (cchDis < s_offAnnotation)
14567 {
14568 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
14569 cchDis = s_offAnnotation;
14570 }
14571 szDisBuf[cchDis++] = ' ';
14572 szDisBuf[cchDis++] = ';';
14573 szDisBuf[cchDis++] = ' ';
14574 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
14575 }
14576 }
14577 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14578 }
14579 }
14580 else
14581 {
14582# if defined(RT_ARCH_AMD64)
14583 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14584 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14585# elif defined(RT_ARCH_ARM64)
14586 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14587# else
14588# error "Port me"
14589# endif
14590 cbInstr = sizeof(paNative[0]);
14591 }
14592 offNative += cbInstr / sizeof(paNative[0]);
14593
14594# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14595 cs_insn *pInstr;
14596 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14597 (uintptr_t)pNativeCur, 1, &pInstr);
14598 if (cInstrs > 0)
14599 {
14600 Assert(cInstrs == 1);
14601 const char *pszAnnotation = NULL;
14602# if defined(RT_ARCH_ARM64)
14603 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
14604 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
14605 {
14606 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
14607 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
14608 char *psz = strchr(pInstr->op_str, '[');
14609 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
14610 {
14611 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
14612 int32_t off = -1;
14613 psz += 4;
14614 if (*psz == ']')
14615 off = 0;
14616 else if (*psz == ',')
14617 {
14618 psz = RTStrStripL(psz + 1);
14619 if (*psz == '#')
14620 off = RTStrToInt32(&psz[1]);
14621 /** @todo deal with index registers and LSL as well... */
14622 }
14623 if (off >= 0)
14624 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
14625 }
14626 }
14627# endif
14628
14629 size_t const cchOp = strlen(pInstr->op_str);
14630# if defined(RT_ARCH_AMD64)
14631 if (pszAnnotation)
14632 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
14633 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
14634 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
14635 else
14636 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14637 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14638
14639# else
14640 if (pszAnnotation)
14641 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
14642 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
14643 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
14644 else
14645 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14646 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14647# endif
14648 offNative += pInstr->size / sizeof(*pNativeCur);
14649 cs_free(pInstr, cInstrs);
14650 }
14651 else
14652 {
14653# if defined(RT_ARCH_AMD64)
14654 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14655 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14656# else
14657 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14658# endif
14659 offNative++;
14660 }
14661# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14662 }
14663 }
14664 else
14665#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
14666 {
14667 /*
14668 * No debug info, just disassemble the x86 code and then the native code.
14669 *
14670 * First the guest code:
14671 */
14672 for (unsigned i = 0; i < pTb->cRanges; i++)
14673 {
14674 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
14675 + (pTb->aRanges[i].idxPhysPage == 0
14676 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14677 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
14678 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14679 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
14680 unsigned off = pTb->aRanges[i].offOpcodes;
14681 /** @todo this ain't working when crossing pages! */
14682 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
14683 while (off < cbOpcodes)
14684 {
14685 uint32_t cbInstr = 1;
14686 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14687 &pTb->pabOpcodes[off], cbOpcodes - off,
14688 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14689 if (RT_SUCCESS(rc))
14690 {
14691 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14692 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14693 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14694 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14695 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
14696 GCPhysPc += cbInstr;
14697 off += cbInstr;
14698 }
14699 else
14700 {
14701 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
14702 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
14703 break;
14704 }
14705 }
14706 }
14707
14708 /*
14709 * Then the native code:
14710 */
14711 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
14712 while (offNative < cNative)
14713 {
14714 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14715# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14716 uint32_t cbInstr = sizeof(paNative[0]);
14717 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14718 if (RT_SUCCESS(rc))
14719 {
14720# if defined(RT_ARCH_AMD64)
14721 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14722 {
14723 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14724 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14725 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14726 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14727 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14728 uInfo & 0x8000 ? "recompiled" : "todo");
14729 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14730 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14731 else
14732 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14733 }
14734 else
14735# endif
14736 {
14737# ifdef RT_ARCH_AMD64
14738 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14739 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14740 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14741 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14742# elif defined(RT_ARCH_ARM64)
14743 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14744 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14745 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14746# else
14747# error "Port me"
14748# endif
14749 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14750 }
14751 }
14752 else
14753 {
14754# if defined(RT_ARCH_AMD64)
14755 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14756 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14757# else
14758 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14759# endif
14760 cbInstr = sizeof(paNative[0]);
14761 }
14762 offNative += cbInstr / sizeof(paNative[0]);
14763
14764# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14765 cs_insn *pInstr;
14766 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14767 (uintptr_t)pNativeCur, 1, &pInstr);
14768 if (cInstrs > 0)
14769 {
14770 Assert(cInstrs == 1);
14771# if defined(RT_ARCH_AMD64)
14772 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14773 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14774# else
14775 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14776 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14777# endif
14778 offNative += pInstr->size / sizeof(*pNativeCur);
14779 cs_free(pInstr, cInstrs);
14780 }
14781 else
14782 {
14783# if defined(RT_ARCH_AMD64)
14784 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14785 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14786# else
14787 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14788# endif
14789 offNative++;
14790 }
14791# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14792 }
14793 }
14794
14795#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14796 /* Cleanup. */
14797 cs_close(&hDisasm);
14798#endif
14799}
14800
14801
14802/**
14803 * Recompiles the given threaded TB into a native one.
14804 *
14805 * In case of failure the translation block will be returned as-is.
14806 *
14807 * @returns pTb.
14808 * @param pVCpu The cross context virtual CPU structure of the calling
14809 * thread.
14810 * @param pTb The threaded translation to recompile to native.
14811 */
14812DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
14813{
14814 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
14815
14816 /*
14817 * The first time thru, we allocate the recompiler state, the other times
14818 * we just need to reset it before using it again.
14819 */
14820 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
14821 if (RT_LIKELY(pReNative))
14822 iemNativeReInit(pReNative, pTb);
14823 else
14824 {
14825 pReNative = iemNativeInit(pVCpu, pTb);
14826 AssertReturn(pReNative, pTb);
14827 }
14828
14829#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14830 /*
14831 * First do liveness analysis. This is done backwards.
14832 */
14833 {
14834 uint32_t idxCall = pTb->Thrd.cCalls;
14835 if (idxCall <= pReNative->cLivenessEntriesAlloc)
14836 { /* likely */ }
14837 else
14838 {
14839 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
14840 while (idxCall > cAlloc)
14841 cAlloc *= 2;
14842 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
14843 AssertReturn(pvNew, pTb);
14844 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
14845 pReNative->cLivenessEntriesAlloc = cAlloc;
14846 }
14847 AssertReturn(idxCall > 0, pTb);
14848 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
14849
14850 /* The initial (final) entry. */
14851 idxCall--;
14852 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
14853
14854 /* Loop backwards thru the calls and fill in the other entries. */
14855 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
14856 while (idxCall > 0)
14857 {
14858 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
14859 if (pfnLiveness)
14860 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
14861 else
14862 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
14863 pCallEntry--;
14864 idxCall--;
14865 }
14866
14867# ifdef VBOX_WITH_STATISTICS
14868 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
14869 to 'clobbered' rather that 'input'. */
14870 /** @todo */
14871# endif
14872 }
14873#endif
14874
14875 /*
14876 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
14877 * for aborting if an error happens.
14878 */
14879 uint32_t cCallsLeft = pTb->Thrd.cCalls;
14880#ifdef LOG_ENABLED
14881 uint32_t const cCallsOrg = cCallsLeft;
14882#endif
14883 uint32_t off = 0;
14884 int rc = VINF_SUCCESS;
14885 IEMNATIVE_TRY_SETJMP(pReNative, rc)
14886 {
14887 /*
14888 * Emit prolog code (fixed).
14889 */
14890 off = iemNativeEmitProlog(pReNative, off);
14891
14892 /*
14893 * Convert the calls to native code.
14894 */
14895#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14896 int32_t iGstInstr = -1;
14897#endif
14898#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
14899 uint32_t cThreadedCalls = 0;
14900 uint32_t cRecompiledCalls = 0;
14901#endif
14902#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14903 uint32_t idxCurCall = 0;
14904#endif
14905 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
14906 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
14907 while (cCallsLeft-- > 0)
14908 {
14909 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
14910#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14911 pReNative->idxCurCall = idxCurCall;
14912#endif
14913
14914 /*
14915 * Debug info, assembly markup and statistics.
14916 */
14917#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
14918 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
14919 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
14920#endif
14921#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14922 iemNativeDbgInfoAddNativeOffset(pReNative, off);
14923 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
14924 {
14925 if (iGstInstr < (int32_t)pTb->cInstructions)
14926 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
14927 else
14928 Assert(iGstInstr == pTb->cInstructions);
14929 iGstInstr = pCallEntry->idxInstr;
14930 }
14931 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
14932#endif
14933#if defined(VBOX_STRICT)
14934 off = iemNativeEmitMarker(pReNative, off,
14935 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
14936#endif
14937#if defined(VBOX_STRICT)
14938 iemNativeRegAssertSanity(pReNative);
14939#endif
14940#ifdef VBOX_WITH_STATISTICS
14941 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
14942#endif
14943
14944 /*
14945 * Actual work.
14946 */
14947 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
14948 pfnRecom ? "(recompiled)" : "(todo)"));
14949 if (pfnRecom) /** @todo stats on this. */
14950 {
14951 off = pfnRecom(pReNative, off, pCallEntry);
14952 STAM_REL_STATS({cRecompiledCalls++;});
14953 }
14954 else
14955 {
14956 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
14957 STAM_REL_STATS({cThreadedCalls++;});
14958 }
14959 Assert(off <= pReNative->cInstrBufAlloc);
14960 Assert(pReNative->cCondDepth == 0);
14961
14962#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
14963 if (LogIs2Enabled())
14964 {
14965 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
14966# ifndef IEMLIVENESS_EXTENDED_LAYOUT
14967 static const char s_achState[] = "CUXI";
14968# else
14969 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
14970# endif
14971
14972 char szGpr[17];
14973 for (unsigned i = 0; i < 16; i++)
14974 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
14975 szGpr[16] = '\0';
14976
14977 char szSegBase[X86_SREG_COUNT + 1];
14978 char szSegLimit[X86_SREG_COUNT + 1];
14979 char szSegAttrib[X86_SREG_COUNT + 1];
14980 char szSegSel[X86_SREG_COUNT + 1];
14981 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
14982 {
14983 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
14984 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
14985 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
14986 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
14987 }
14988 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
14989 = szSegSel[X86_SREG_COUNT] = '\0';
14990
14991 char szEFlags[8];
14992 for (unsigned i = 0; i < 7; i++)
14993 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
14994 szEFlags[7] = '\0';
14995
14996 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
14997 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
14998 }
14999#endif
15000
15001 /*
15002 * Advance.
15003 */
15004 pCallEntry++;
15005#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
15006 idxCurCall++;
15007#endif
15008 }
15009
15010 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
15011 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
15012 if (!cThreadedCalls)
15013 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
15014
15015 /*
15016 * Emit the epilog code.
15017 */
15018 uint32_t idxReturnLabel;
15019 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
15020
15021 /*
15022 * Generate special jump labels.
15023 */
15024 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
15025 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
15026 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
15027 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
15028 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
15029 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
15030 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
15031 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
15032 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
15033 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
15034 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
15035 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
15036 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
15037 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
15038 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
15039 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
15040 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
15041 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
15042 }
15043 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
15044 {
15045 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
15046 return pTb;
15047 }
15048 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
15049 Assert(off <= pReNative->cInstrBufAlloc);
15050
15051 /*
15052 * Make sure all labels has been defined.
15053 */
15054 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
15055#ifdef VBOX_STRICT
15056 uint32_t const cLabels = pReNative->cLabels;
15057 for (uint32_t i = 0; i < cLabels; i++)
15058 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
15059#endif
15060
15061 /*
15062 * Allocate executable memory, copy over the code we've generated.
15063 */
15064 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
15065 if (pTbAllocator->pDelayedFreeHead)
15066 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
15067
15068 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
15069 AssertReturn(paFinalInstrBuf, pTb);
15070 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
15071
15072 /*
15073 * Apply fixups.
15074 */
15075 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
15076 uint32_t const cFixups = pReNative->cFixups;
15077 for (uint32_t i = 0; i < cFixups; i++)
15078 {
15079 Assert(paFixups[i].off < off);
15080 Assert(paFixups[i].idxLabel < cLabels);
15081 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
15082 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
15083 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
15084 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
15085 switch (paFixups[i].enmType)
15086 {
15087#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
15088 case kIemNativeFixupType_Rel32:
15089 Assert(paFixups[i].off + 4 <= off);
15090 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15091 continue;
15092
15093#elif defined(RT_ARCH_ARM64)
15094 case kIemNativeFixupType_RelImm26At0:
15095 {
15096 Assert(paFixups[i].off < off);
15097 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15098 Assert(offDisp >= -262144 && offDisp < 262144);
15099 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
15100 continue;
15101 }
15102
15103 case kIemNativeFixupType_RelImm19At5:
15104 {
15105 Assert(paFixups[i].off < off);
15106 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15107 Assert(offDisp >= -262144 && offDisp < 262144);
15108 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
15109 continue;
15110 }
15111
15112 case kIemNativeFixupType_RelImm14At5:
15113 {
15114 Assert(paFixups[i].off < off);
15115 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15116 Assert(offDisp >= -8192 && offDisp < 8192);
15117 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
15118 continue;
15119 }
15120
15121#endif
15122 case kIemNativeFixupType_Invalid:
15123 case kIemNativeFixupType_End:
15124 break;
15125 }
15126 AssertFailed();
15127 }
15128
15129 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
15130 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
15131
15132 /*
15133 * Convert the translation block.
15134 */
15135 RTMemFree(pTb->Thrd.paCalls);
15136 pTb->Native.paInstructions = paFinalInstrBuf;
15137 pTb->Native.cInstructions = off;
15138 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
15139#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15140 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
15141 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
15142#endif
15143
15144 Assert(pTbAllocator->cThreadedTbs > 0);
15145 pTbAllocator->cThreadedTbs -= 1;
15146 pTbAllocator->cNativeTbs += 1;
15147 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
15148
15149#ifdef LOG_ENABLED
15150 /*
15151 * Disassemble to the log if enabled.
15152 */
15153 if (LogIs3Enabled())
15154 {
15155 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
15156 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
15157# ifdef DEBUG_bird
15158 RTLogFlush(NULL);
15159# endif
15160 }
15161#endif
15162 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
15163
15164 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
15165 return pTb;
15166}
15167
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette