VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103254

Last change on this file since 103254 was 103236, checked in by vboxsync, 13 months ago

VMM/IEM: Liveness analysis, part 8: scm fix. bugref:10372 bugref:10375

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 610.1 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103236 2024-02-07 01:39:16Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when detecting opcode changes.
1601 * @see iemThreadeFuncWorkerObsoleteTb
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1604{
1605 /* We set fSafeToFree to false where as we're being called in the context
1606 of a TB callback function, which for native TBs means we cannot release
1607 the executable memory till we've returned our way back to iemTbExec as
1608 that return path codes via the native code generated for the TB. */
1609 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1610 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1611 return VINF_IEM_REEXEC_BREAK;
1612}
1613
1614
1615/**
1616 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1619{
1620 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1621 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1622 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1623 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1624 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1625 return VINF_IEM_REEXEC_BREAK;
1626}
1627
1628
1629/**
1630 * Used by TB code when we missed a PC check after a branch.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1633{
1634 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1635 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1636 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1637 pVCpu->iem.s.pbInstrBuf));
1638 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1639 return VINF_IEM_REEXEC_BREAK;
1640}
1641
1642
1643
1644/*********************************************************************************************************************************
1645* Helpers: Segmented memory fetches and stores. *
1646*********************************************************************************************************************************/
1647
1648/**
1649 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1652{
1653#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1654 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1655#else
1656 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1657#endif
1658}
1659
1660
1661/**
1662 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1663 * to 16 bits.
1664 */
1665IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1666{
1667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1668 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1669#else
1670 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1671#endif
1672}
1673
1674
1675/**
1676 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1677 * to 32 bits.
1678 */
1679IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1680{
1681#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1682 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1683#else
1684 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1685#endif
1686}
1687
1688/**
1689 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1690 * to 64 bits.
1691 */
1692IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1693{
1694#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1695 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1696#else
1697 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1698#endif
1699}
1700
1701
1702/**
1703 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1704 */
1705IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1706{
1707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1708 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1709#else
1710 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1711#endif
1712}
1713
1714
1715/**
1716 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1717 * to 32 bits.
1718 */
1719IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1720{
1721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1722 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1723#else
1724 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1725#endif
1726}
1727
1728
1729/**
1730 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1731 * to 64 bits.
1732 */
1733IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1734{
1735#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1736 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1737#else
1738 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1739#endif
1740}
1741
1742
1743/**
1744 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1745 */
1746IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1747{
1748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1749 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1750#else
1751 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1752#endif
1753}
1754
1755
1756/**
1757 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1758 * to 64 bits.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1761{
1762#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1763 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1764#else
1765 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1766#endif
1767}
1768
1769
1770/**
1771 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1772 */
1773IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1774{
1775#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1776 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1777#else
1778 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1779#endif
1780}
1781
1782
1783/**
1784 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1789 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1790#else
1791 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1792#endif
1793}
1794
1795
1796/**
1797 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1802 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1803#else
1804 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1815 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1816#else
1817 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1828 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1829#else
1830 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1831#endif
1832}
1833
1834
1835
1836/**
1837 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1838 */
1839IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1840{
1841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1842 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1843#else
1844 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1845#endif
1846}
1847
1848
1849/**
1850 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1851 */
1852IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1853{
1854#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1855 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1856#else
1857 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1858#endif
1859}
1860
1861
1862/**
1863 * Used by TB code to store an 32-bit selector value onto a generic stack.
1864 *
1865 * Intel CPUs doesn't do write a whole dword, thus the special function.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1870 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1871#else
1872 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1883 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1884#else
1885 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1886#endif
1887}
1888
1889
1890/**
1891 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1896 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1897#else
1898 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1909 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1910#else
1911 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1922 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1923#else
1924 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1925#endif
1926}
1927
1928
1929
1930/*********************************************************************************************************************************
1931* Helpers: Flat memory fetches and stores. *
1932*********************************************************************************************************************************/
1933
1934/**
1935 * Used by TB code to load unsigned 8-bit data w/ flat address.
1936 * @note Zero extending the value to 64-bit to simplify assembly.
1937 */
1938IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1939{
1940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1941 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1942#else
1943 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1944#endif
1945}
1946
1947
1948/**
1949 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1950 * to 16 bits.
1951 * @note Zero extending the value to 64-bit to simplify assembly.
1952 */
1953IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1954{
1955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1956 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1957#else
1958 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1959#endif
1960}
1961
1962
1963/**
1964 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1965 * to 32 bits.
1966 * @note Zero extending the value to 64-bit to simplify assembly.
1967 */
1968IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1969{
1970#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1971 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1972#else
1973 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1974#endif
1975}
1976
1977
1978/**
1979 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1980 * to 64 bits.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1983{
1984#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1985 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1986#else
1987 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1988#endif
1989}
1990
1991
1992/**
1993 * Used by TB code to load unsigned 16-bit data w/ flat address.
1994 * @note Zero extending the value to 64-bit to simplify assembly.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1999 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2000#else
2001 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2008 * to 32 bits.
2009 * @note Zero extending the value to 64-bit to simplify assembly.
2010 */
2011IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2012{
2013#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2014 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2015#else
2016 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2017#endif
2018}
2019
2020
2021/**
2022 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2023 * to 64 bits.
2024 * @note Zero extending the value to 64-bit to simplify assembly.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2029 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2030#else
2031 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to load unsigned 32-bit data w/ flat address.
2038 * @note Zero extending the value to 64-bit to simplify assembly.
2039 */
2040IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2041{
2042#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2043 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2044#else
2045 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2046#endif
2047}
2048
2049
2050/**
2051 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2052 * to 64 bits.
2053 * @note Zero extending the value to 64-bit to simplify assembly.
2054 */
2055IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2056{
2057#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2058 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2059#else
2060 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2061#endif
2062}
2063
2064
2065/**
2066 * Used by TB code to load unsigned 64-bit data w/ flat address.
2067 */
2068IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2069{
2070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2071 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2072#else
2073 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2074#endif
2075}
2076
2077
2078/**
2079 * Used by TB code to store unsigned 8-bit data w/ flat address.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2082{
2083#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2084 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2085#else
2086 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2087#endif
2088}
2089
2090
2091/**
2092 * Used by TB code to store unsigned 16-bit data w/ flat address.
2093 */
2094IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2095{
2096#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2097 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2098#else
2099 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2100#endif
2101}
2102
2103
2104/**
2105 * Used by TB code to store unsigned 32-bit data w/ flat address.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2110 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2111#else
2112 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to store unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2123 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2124#else
2125 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2126#endif
2127}
2128
2129
2130
2131/**
2132 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2137 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2138#else
2139 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2150 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2151#else
2152 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2153#endif
2154}
2155
2156
2157/**
2158 * Used by TB code to store a segment selector value onto a flat stack.
2159 *
2160 * Intel CPUs doesn't do write a whole dword, thus the special function.
2161 */
2162IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2163{
2164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2165 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2166#else
2167 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2168#endif
2169}
2170
2171
2172/**
2173 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2174 */
2175IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2176{
2177#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2178 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2179#else
2180 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2181#endif
2182}
2183
2184
2185/**
2186 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2187 */
2188IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2189{
2190#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2191 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2192#else
2193 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2194#endif
2195}
2196
2197
2198/**
2199 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2202{
2203#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2204 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2205#else
2206 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2207#endif
2208}
2209
2210
2211/**
2212 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2217 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2218#else
2219 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2220#endif
2221}
2222
2223
2224
2225/*********************************************************************************************************************************
2226* Helpers: Segmented memory mapping. *
2227*********************************************************************************************************************************/
2228
2229/**
2230 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2231 * segmentation.
2232 */
2233IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2234 RTGCPTR GCPtrMem, uint8_t iSegReg))
2235{
2236#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2237 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2238#else
2239 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2240#endif
2241}
2242
2243
2244/**
2245 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2246 */
2247IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2248 RTGCPTR GCPtrMem, uint8_t iSegReg))
2249{
2250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2251 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2252#else
2253 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2254#endif
2255}
2256
2257
2258/**
2259 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2260 */
2261IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2262 RTGCPTR GCPtrMem, uint8_t iSegReg))
2263{
2264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2265 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2266#else
2267 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2268#endif
2269}
2270
2271
2272/**
2273 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2274 */
2275IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2276 RTGCPTR GCPtrMem, uint8_t iSegReg))
2277{
2278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2279 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2280#else
2281 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2282#endif
2283}
2284
2285
2286/**
2287 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2288 * segmentation.
2289 */
2290IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2291 RTGCPTR GCPtrMem, uint8_t iSegReg))
2292{
2293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2294 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2295#else
2296 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2297#endif
2298}
2299
2300
2301/**
2302 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2303 */
2304IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2305 RTGCPTR GCPtrMem, uint8_t iSegReg))
2306{
2307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2308 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2309#else
2310 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2311#endif
2312}
2313
2314
2315/**
2316 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2317 */
2318IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2319 RTGCPTR GCPtrMem, uint8_t iSegReg))
2320{
2321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2322 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2323#else
2324 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2325#endif
2326}
2327
2328
2329/**
2330 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2333 RTGCPTR GCPtrMem, uint8_t iSegReg))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2336 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2337#else
2338 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2345 * segmentation.
2346 */
2347IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2348 RTGCPTR GCPtrMem, uint8_t iSegReg))
2349{
2350#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2351 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2352#else
2353 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2354#endif
2355}
2356
2357
2358/**
2359 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2360 */
2361IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2362 RTGCPTR GCPtrMem, uint8_t iSegReg))
2363{
2364#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2365 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2366#else
2367 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2368#endif
2369}
2370
2371
2372/**
2373 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2374 */
2375IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2376 RTGCPTR GCPtrMem, uint8_t iSegReg))
2377{
2378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2379 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2380#else
2381 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2382#endif
2383}
2384
2385
2386/**
2387 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2390 RTGCPTR GCPtrMem, uint8_t iSegReg))
2391{
2392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2393 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2394#else
2395 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2396#endif
2397}
2398
2399
2400/**
2401 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2402 * segmentation.
2403 */
2404IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2405 RTGCPTR GCPtrMem, uint8_t iSegReg))
2406{
2407#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2408 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2409#else
2410 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2411#endif
2412}
2413
2414
2415/**
2416 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2417 */
2418IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2419 RTGCPTR GCPtrMem, uint8_t iSegReg))
2420{
2421#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2422 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2423#else
2424 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2425#endif
2426}
2427
2428
2429/**
2430 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2431 */
2432IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2433 RTGCPTR GCPtrMem, uint8_t iSegReg))
2434{
2435#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2436 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2437#else
2438 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2439#endif
2440}
2441
2442
2443/**
2444 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2445 */
2446IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2447 RTGCPTR GCPtrMem, uint8_t iSegReg))
2448{
2449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2450 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2451#else
2452 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2453#endif
2454}
2455
2456
2457/**
2458 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2459 */
2460IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2461 RTGCPTR GCPtrMem, uint8_t iSegReg))
2462{
2463#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2464 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2465#else
2466 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2467#endif
2468}
2469
2470
2471/**
2472 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2473 */
2474IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2475 RTGCPTR GCPtrMem, uint8_t iSegReg))
2476{
2477#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2478 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2479#else
2480 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2481#endif
2482}
2483
2484
2485/**
2486 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2487 * segmentation.
2488 */
2489IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2490 RTGCPTR GCPtrMem, uint8_t iSegReg))
2491{
2492#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2493 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2494#else
2495 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2496#endif
2497}
2498
2499
2500/**
2501 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2502 */
2503IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2504 RTGCPTR GCPtrMem, uint8_t iSegReg))
2505{
2506#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2507 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2508#else
2509 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2510#endif
2511}
2512
2513
2514/**
2515 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2516 */
2517IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2518 RTGCPTR GCPtrMem, uint8_t iSegReg))
2519{
2520#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2521 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2522#else
2523 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2524#endif
2525}
2526
2527
2528/**
2529 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2530 */
2531IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2532 RTGCPTR GCPtrMem, uint8_t iSegReg))
2533{
2534#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2535 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2536#else
2537 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2538#endif
2539}
2540
2541
2542/*********************************************************************************************************************************
2543* Helpers: Flat memory mapping. *
2544*********************************************************************************************************************************/
2545
2546/**
2547 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2548 * address.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2551{
2552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2553 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2554#else
2555 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2556#endif
2557}
2558
2559
2560/**
2561 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2562 */
2563IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2564{
2565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2566 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2567#else
2568 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2569#endif
2570}
2571
2572
2573/**
2574 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2575 */
2576IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2577{
2578#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2579 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2580#else
2581 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2582#endif
2583}
2584
2585
2586/**
2587 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2588 */
2589IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2590{
2591#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2592 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2593#else
2594 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2595#endif
2596}
2597
2598
2599/**
2600 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2601 * address.
2602 */
2603IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2604{
2605#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2606 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2607#else
2608 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2609#endif
2610}
2611
2612
2613/**
2614 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2615 */
2616IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2617{
2618#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2619 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2620#else
2621 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2622#endif
2623}
2624
2625
2626/**
2627 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2628 */
2629IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2630{
2631#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2632 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2633#else
2634 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2635#endif
2636}
2637
2638
2639/**
2640 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2641 */
2642IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2643{
2644#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2645 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2646#else
2647 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2648#endif
2649}
2650
2651
2652/**
2653 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2654 * address.
2655 */
2656IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2657{
2658#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2659 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2660#else
2661 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2662#endif
2663}
2664
2665
2666/**
2667 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2668 */
2669IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2670{
2671#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2672 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2673#else
2674 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2675#endif
2676}
2677
2678
2679/**
2680 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2681 */
2682IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2686#else
2687 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2688#endif
2689}
2690
2691
2692/**
2693 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2694 */
2695IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2696{
2697#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2698 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2699#else
2700 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2701#endif
2702}
2703
2704
2705/**
2706 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2707 * address.
2708 */
2709IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2710{
2711#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2712 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2713#else
2714 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2715#endif
2716}
2717
2718
2719/**
2720 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2721 */
2722IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2723{
2724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2725 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2726#else
2727 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2728#endif
2729}
2730
2731
2732/**
2733 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2734 */
2735IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2736{
2737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2738 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2739#else
2740 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2741#endif
2742}
2743
2744
2745/**
2746 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2747 */
2748IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2749{
2750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2751 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2752#else
2753 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2754#endif
2755}
2756
2757
2758/**
2759 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2786 * address.
2787 */
2788IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2789{
2790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2791 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2792#else
2793 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2794#endif
2795}
2796
2797
2798/**
2799 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2800 */
2801IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2802{
2803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2804 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2805#else
2806 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2807#endif
2808}
2809
2810
2811/**
2812 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2813 */
2814IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2815{
2816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2817 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2818#else
2819 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2820#endif
2821}
2822
2823
2824/**
2825 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2826 */
2827IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2828{
2829#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2830 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2831#else
2832 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2833#endif
2834}
2835
2836
2837/*********************************************************************************************************************************
2838* Helpers: Commit, rollback & unmap *
2839*********************************************************************************************************************************/
2840
2841/**
2842 * Used by TB code to commit and unmap a read-write memory mapping.
2843 */
2844IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2845{
2846 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2847}
2848
2849
2850/**
2851 * Used by TB code to commit and unmap a read-write memory mapping.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2854{
2855 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2856}
2857
2858
2859/**
2860 * Used by TB code to commit and unmap a write-only memory mapping.
2861 */
2862IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2863{
2864 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2865}
2866
2867
2868/**
2869 * Used by TB code to commit and unmap a read-only memory mapping.
2870 */
2871IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2872{
2873 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2874}
2875
2876
2877/**
2878 * Reinitializes the native recompiler state.
2879 *
2880 * Called before starting a new recompile job.
2881 */
2882static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2883{
2884 pReNative->cLabels = 0;
2885 pReNative->bmLabelTypes = 0;
2886 pReNative->cFixups = 0;
2887#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2888 pReNative->pDbgInfo->cEntries = 0;
2889#endif
2890 pReNative->pTbOrg = pTb;
2891 pReNative->cCondDepth = 0;
2892 pReNative->uCondSeqNo = 0;
2893 pReNative->uCheckIrqSeqNo = 0;
2894 pReNative->uTlbSeqNo = 0;
2895
2896 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2897#if IEMNATIVE_HST_GREG_COUNT < 32
2898 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2899#endif
2900 ;
2901 pReNative->Core.bmHstRegsWithGstShadow = 0;
2902 pReNative->Core.bmGstRegShadows = 0;
2903 pReNative->Core.bmVars = 0;
2904 pReNative->Core.bmStack = 0;
2905 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2906 pReNative->Core.u64ArgVars = UINT64_MAX;
2907
2908 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2909 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2910 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2911 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2912 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2913 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2914 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2915 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2916 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2917 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2918
2919 /* Full host register reinit: */
2920 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2921 {
2922 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2923 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2924 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2925 }
2926
2927 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2928 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2929#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2930 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2931#endif
2932#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2933 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2934#endif
2935 );
2936 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2937 {
2938 fRegs &= ~RT_BIT_32(idxReg);
2939 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2940 }
2941
2942 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2943#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2944 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2945#endif
2946#ifdef IEMNATIVE_REG_FIXED_TMP0
2947 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2948#endif
2949 return pReNative;
2950}
2951
2952
2953/**
2954 * Allocates and initializes the native recompiler state.
2955 *
2956 * This is called the first time an EMT wants to recompile something.
2957 *
2958 * @returns Pointer to the new recompiler state.
2959 * @param pVCpu The cross context virtual CPU structure of the calling
2960 * thread.
2961 * @param pTb The TB that's about to be recompiled.
2962 * @thread EMT(pVCpu)
2963 */
2964static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2965{
2966 VMCPU_ASSERT_EMT(pVCpu);
2967
2968 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2969 AssertReturn(pReNative, NULL);
2970
2971 /*
2972 * Try allocate all the buffers and stuff we need.
2973 */
2974 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2975 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2976 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2977#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2978 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2979#endif
2980 if (RT_LIKELY( pReNative->pInstrBuf
2981 && pReNative->paLabels
2982 && pReNative->paFixups)
2983#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2984 && pReNative->pDbgInfo
2985#endif
2986 )
2987 {
2988 /*
2989 * Set the buffer & array sizes on success.
2990 */
2991 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2992 pReNative->cLabelsAlloc = _8K;
2993 pReNative->cFixupsAlloc = _16K;
2994#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2995 pReNative->cDbgInfoAlloc = _16K;
2996#endif
2997
2998 /* Other constant stuff: */
2999 pReNative->pVCpu = pVCpu;
3000
3001 /*
3002 * Done, just need to save it and reinit it.
3003 */
3004 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3005 return iemNativeReInit(pReNative, pTb);
3006 }
3007
3008 /*
3009 * Failed. Cleanup and return.
3010 */
3011 AssertFailed();
3012 RTMemFree(pReNative->pInstrBuf);
3013 RTMemFree(pReNative->paLabels);
3014 RTMemFree(pReNative->paFixups);
3015#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3016 RTMemFree(pReNative->pDbgInfo);
3017#endif
3018 RTMemFree(pReNative);
3019 return NULL;
3020}
3021
3022
3023/**
3024 * Creates a label
3025 *
3026 * If the label does not yet have a defined position,
3027 * call iemNativeLabelDefine() later to set it.
3028 *
3029 * @returns Label ID. Throws VBox status code on failure, so no need to check
3030 * the return value.
3031 * @param pReNative The native recompile state.
3032 * @param enmType The label type.
3033 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3034 * label is not yet defined (default).
3035 * @param uData Data associated with the lable. Only applicable to
3036 * certain type of labels. Default is zero.
3037 */
3038DECL_HIDDEN_THROW(uint32_t)
3039iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3040 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3041{
3042 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3043
3044 /*
3045 * Locate existing label definition.
3046 *
3047 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3048 * and uData is zero.
3049 */
3050 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3051 uint32_t const cLabels = pReNative->cLabels;
3052 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3053#ifndef VBOX_STRICT
3054 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3055 && offWhere == UINT32_MAX
3056 && uData == 0
3057#endif
3058 )
3059 {
3060#ifndef VBOX_STRICT
3061 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3062 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3063 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3064 if (idxLabel < pReNative->cLabels)
3065 return idxLabel;
3066#else
3067 for (uint32_t i = 0; i < cLabels; i++)
3068 if ( paLabels[i].enmType == enmType
3069 && paLabels[i].uData == uData)
3070 {
3071 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3072 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3073 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3074 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3075 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3076 return i;
3077 }
3078 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3079 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3080#endif
3081 }
3082
3083 /*
3084 * Make sure we've got room for another label.
3085 */
3086 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3087 { /* likely */ }
3088 else
3089 {
3090 uint32_t cNew = pReNative->cLabelsAlloc;
3091 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3092 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3093 cNew *= 2;
3094 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3095 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3096 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3097 pReNative->paLabels = paLabels;
3098 pReNative->cLabelsAlloc = cNew;
3099 }
3100
3101 /*
3102 * Define a new label.
3103 */
3104 paLabels[cLabels].off = offWhere;
3105 paLabels[cLabels].enmType = enmType;
3106 paLabels[cLabels].uData = uData;
3107 pReNative->cLabels = cLabels + 1;
3108
3109 Assert((unsigned)enmType < 64);
3110 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3111
3112 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3113 {
3114 Assert(uData == 0);
3115 pReNative->aidxUniqueLabels[enmType] = cLabels;
3116 }
3117
3118 if (offWhere != UINT32_MAX)
3119 {
3120#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3121 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3122 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3123#endif
3124 }
3125 return cLabels;
3126}
3127
3128
3129/**
3130 * Defines the location of an existing label.
3131 *
3132 * @param pReNative The native recompile state.
3133 * @param idxLabel The label to define.
3134 * @param offWhere The position.
3135 */
3136DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3137{
3138 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3139 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3140 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3141 pLabel->off = offWhere;
3142#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3143 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3144 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3145#endif
3146}
3147
3148
3149/**
3150 * Looks up a lable.
3151 *
3152 * @returns Label ID if found, UINT32_MAX if not.
3153 */
3154static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3155 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3156{
3157 Assert((unsigned)enmType < 64);
3158 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3159 {
3160 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3161 return pReNative->aidxUniqueLabels[enmType];
3162
3163 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3164 uint32_t const cLabels = pReNative->cLabels;
3165 for (uint32_t i = 0; i < cLabels; i++)
3166 if ( paLabels[i].enmType == enmType
3167 && paLabels[i].uData == uData
3168 && ( paLabels[i].off == offWhere
3169 || offWhere == UINT32_MAX
3170 || paLabels[i].off == UINT32_MAX))
3171 return i;
3172 }
3173 return UINT32_MAX;
3174}
3175
3176
3177/**
3178 * Adds a fixup.
3179 *
3180 * @throws VBox status code (int) on failure.
3181 * @param pReNative The native recompile state.
3182 * @param offWhere The instruction offset of the fixup location.
3183 * @param idxLabel The target label ID for the fixup.
3184 * @param enmType The fixup type.
3185 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3186 */
3187DECL_HIDDEN_THROW(void)
3188iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3189 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3190{
3191 Assert(idxLabel <= UINT16_MAX);
3192 Assert((unsigned)enmType <= UINT8_MAX);
3193
3194 /*
3195 * Make sure we've room.
3196 */
3197 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3198 uint32_t const cFixups = pReNative->cFixups;
3199 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3200 { /* likely */ }
3201 else
3202 {
3203 uint32_t cNew = pReNative->cFixupsAlloc;
3204 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3205 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3206 cNew *= 2;
3207 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3208 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3209 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3210 pReNative->paFixups = paFixups;
3211 pReNative->cFixupsAlloc = cNew;
3212 }
3213
3214 /*
3215 * Add the fixup.
3216 */
3217 paFixups[cFixups].off = offWhere;
3218 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3219 paFixups[cFixups].enmType = enmType;
3220 paFixups[cFixups].offAddend = offAddend;
3221 pReNative->cFixups = cFixups + 1;
3222}
3223
3224
3225/**
3226 * Slow code path for iemNativeInstrBufEnsure.
3227 */
3228DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3229{
3230 /* Double the buffer size till we meet the request. */
3231 uint32_t cNew = pReNative->cInstrBufAlloc;
3232 AssertReturn(cNew > 0, NULL);
3233 do
3234 cNew *= 2;
3235 while (cNew < off + cInstrReq);
3236
3237 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3238#ifdef RT_ARCH_ARM64
3239 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3240#else
3241 uint32_t const cbMaxInstrBuf = _2M;
3242#endif
3243 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3244
3245 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3246 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3247
3248#ifdef VBOX_STRICT
3249 pReNative->offInstrBufChecked = off + cInstrReq;
3250#endif
3251 pReNative->cInstrBufAlloc = cNew;
3252 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3253}
3254
3255#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3256
3257/**
3258 * Grows the static debug info array used during recompilation.
3259 *
3260 * @returns Pointer to the new debug info block; throws VBox status code on
3261 * failure, so no need to check the return value.
3262 */
3263DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3264{
3265 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3266 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3267 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3268 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3269 pReNative->pDbgInfo = pDbgInfo;
3270 pReNative->cDbgInfoAlloc = cNew;
3271 return pDbgInfo;
3272}
3273
3274
3275/**
3276 * Adds a new debug info uninitialized entry, returning the pointer to it.
3277 */
3278DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3279{
3280 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3281 { /* likely */ }
3282 else
3283 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3284 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3285}
3286
3287
3288/**
3289 * Debug Info: Adds a native offset record, if necessary.
3290 */
3291static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3292{
3293 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3294
3295 /*
3296 * Search backwards to see if we've got a similar record already.
3297 */
3298 uint32_t idx = pDbgInfo->cEntries;
3299 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3300 while (idx-- > idxStop)
3301 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3302 {
3303 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3304 return;
3305 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3307 break;
3308 }
3309
3310 /*
3311 * Add it.
3312 */
3313 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3314 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3315 pEntry->NativeOffset.offNative = off;
3316}
3317
3318
3319/**
3320 * Debug Info: Record info about a label.
3321 */
3322static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3323{
3324 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3325 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3326 pEntry->Label.uUnused = 0;
3327 pEntry->Label.enmLabel = (uint8_t)enmType;
3328 pEntry->Label.uData = uData;
3329}
3330
3331
3332/**
3333 * Debug Info: Record info about a threaded call.
3334 */
3335static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3336{
3337 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3338 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3339 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3340 pEntry->ThreadedCall.uUnused = 0;
3341 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3342}
3343
3344
3345/**
3346 * Debug Info: Record info about a new guest instruction.
3347 */
3348static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3349{
3350 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3351 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3352 pEntry->GuestInstruction.uUnused = 0;
3353 pEntry->GuestInstruction.fExec = fExec;
3354}
3355
3356
3357/**
3358 * Debug Info: Record info about guest register shadowing.
3359 */
3360static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3361 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3362{
3363 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3364 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3365 pEntry->GuestRegShadowing.uUnused = 0;
3366 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3367 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3368 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3369}
3370
3371#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3372
3373
3374/*********************************************************************************************************************************
3375* Register Allocator *
3376*********************************************************************************************************************************/
3377
3378/**
3379 * Register parameter indexes (indexed by argument number).
3380 */
3381DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3382{
3383 IEMNATIVE_CALL_ARG0_GREG,
3384 IEMNATIVE_CALL_ARG1_GREG,
3385 IEMNATIVE_CALL_ARG2_GREG,
3386 IEMNATIVE_CALL_ARG3_GREG,
3387#if defined(IEMNATIVE_CALL_ARG4_GREG)
3388 IEMNATIVE_CALL_ARG4_GREG,
3389# if defined(IEMNATIVE_CALL_ARG5_GREG)
3390 IEMNATIVE_CALL_ARG5_GREG,
3391# if defined(IEMNATIVE_CALL_ARG6_GREG)
3392 IEMNATIVE_CALL_ARG6_GREG,
3393# if defined(IEMNATIVE_CALL_ARG7_GREG)
3394 IEMNATIVE_CALL_ARG7_GREG,
3395# endif
3396# endif
3397# endif
3398#endif
3399};
3400
3401/**
3402 * Call register masks indexed by argument count.
3403 */
3404DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3405{
3406 0,
3407 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3408 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3409 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3410 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3411 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3412#if defined(IEMNATIVE_CALL_ARG4_GREG)
3413 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3414 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3415# if defined(IEMNATIVE_CALL_ARG5_GREG)
3416 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3417 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3418# if defined(IEMNATIVE_CALL_ARG6_GREG)
3419 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3420 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3421 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3422# if defined(IEMNATIVE_CALL_ARG7_GREG)
3423 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3424 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3425 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3426# endif
3427# endif
3428# endif
3429#endif
3430};
3431
3432#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3433/**
3434 * BP offset of the stack argument slots.
3435 *
3436 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3437 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3438 */
3439DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3440{
3441 IEMNATIVE_FP_OFF_STACK_ARG0,
3442# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3443 IEMNATIVE_FP_OFF_STACK_ARG1,
3444# endif
3445# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3446 IEMNATIVE_FP_OFF_STACK_ARG2,
3447# endif
3448# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3449 IEMNATIVE_FP_OFF_STACK_ARG3,
3450# endif
3451};
3452AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3453#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3454
3455/**
3456 * Info about shadowed guest register values.
3457 * @see IEMNATIVEGSTREG
3458 */
3459static struct
3460{
3461 /** Offset in VMCPU. */
3462 uint32_t off;
3463 /** The field size. */
3464 uint8_t cb;
3465 /** Name (for logging). */
3466 const char *pszName;
3467} const g_aGstShadowInfo[] =
3468{
3469#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3470 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3471 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3472 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3473 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3474 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3475 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3476 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3477 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3478 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3479 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3480 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3481 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3482 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3483 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3484 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3485 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3486 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3487 /* [kIemNativeGstReg_LivenessPadding17] = */ { UINT32_MAX / 4, 0, "pad17", },
3488 /* [kIemNativeGstReg_LivenessPadding18] = */ { UINT32_MAX / 4, 0, "pad18", },
3489 /* [kIemNativeGstReg_LivenessPadding19] = */ { UINT32_MAX / 4, 0, "pad19", },
3490 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3491 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3492 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3493 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3494 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3495 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3496 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3497 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3498 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3499 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3500 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3501 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3502 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3503 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3504 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3505 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3506 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3507 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3508 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3509 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3510 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3511 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3512 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3513 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3514 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3515#undef CPUMCTX_OFF_AND_SIZE
3516};
3517AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3518
3519
3520/** Host CPU general purpose register names. */
3521DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3522{
3523#ifdef RT_ARCH_AMD64
3524 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3525#elif RT_ARCH_ARM64
3526 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3527 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3528#else
3529# error "port me"
3530#endif
3531};
3532
3533
3534DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3535 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3536{
3537 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3538
3539 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3540 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3541 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3542 return (uint8_t)idxReg;
3543}
3544
3545
3546/**
3547 * Tries to locate a suitable register in the given register mask.
3548 *
3549 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3550 * failed.
3551 *
3552 * @returns Host register number on success, returns UINT8_MAX on failure.
3553 */
3554static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3555{
3556 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3557 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3558 if (fRegs)
3559 {
3560 /** @todo pick better here: */
3561 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3562
3563 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3564 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3565 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3566 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3567
3568 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3569 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3570 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3571 return idxReg;
3572 }
3573 return UINT8_MAX;
3574}
3575
3576
3577/**
3578 * Locate a register, possibly freeing one up.
3579 *
3580 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3581 * failed.
3582 *
3583 * @returns Host register number on success. Returns UINT8_MAX if no registers
3584 * found, the caller is supposed to deal with this and raise a
3585 * allocation type specific status code (if desired).
3586 *
3587 * @throws VBox status code if we're run into trouble spilling a variable of
3588 * recording debug info. Does NOT throw anything if we're out of
3589 * registers, though.
3590 */
3591static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3592 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3593{
3594 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3595 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3596
3597 /*
3598 * Try a freed register that's shadowing a guest register
3599 */
3600 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3601 if (fRegs)
3602 {
3603 unsigned const idxReg = (fPreferVolatile
3604 ? ASMBitFirstSetU32(fRegs)
3605 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3606 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
3607 - 1;
3608
3609 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3610 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3611 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3612 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3613
3614 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3615 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3616 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3617 return idxReg;
3618 }
3619
3620 /*
3621 * Try free up a variable that's in a register.
3622 *
3623 * We do two rounds here, first evacuating variables we don't need to be
3624 * saved on the stack, then in the second round move things to the stack.
3625 */
3626 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3627 {
3628 uint32_t fVars = pReNative->Core.bmVars;
3629 while (fVars)
3630 {
3631 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3632 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3633 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3634 && (RT_BIT_32(idxReg) & fRegMask)
3635 && ( iLoop == 0
3636 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3637 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3638 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3639 {
3640 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3641 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3642 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3643 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3644 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3645 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3646
3647 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3648 {
3649 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3650 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3651 }
3652
3653 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3654 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3655
3656 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3657 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3658 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3659 return idxReg;
3660 }
3661 fVars &= ~RT_BIT_32(idxVar);
3662 }
3663 }
3664
3665 return UINT8_MAX;
3666}
3667
3668
3669/**
3670 * Reassigns a variable to a different register specified by the caller.
3671 *
3672 * @returns The new code buffer position.
3673 * @param pReNative The native recompile state.
3674 * @param off The current code buffer position.
3675 * @param idxVar The variable index.
3676 * @param idxRegOld The old host register number.
3677 * @param idxRegNew The new host register number.
3678 * @param pszCaller The caller for logging.
3679 */
3680static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3681 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3682{
3683 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3684 RT_NOREF(pszCaller);
3685
3686 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3687
3688 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3689 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3690 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3691 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3692
3693 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3694 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3695 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3696 if (fGstRegShadows)
3697 {
3698 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3699 | RT_BIT_32(idxRegNew);
3700 while (fGstRegShadows)
3701 {
3702 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3703 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3704
3705 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3706 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3707 }
3708 }
3709
3710 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3711 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3712 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3713 return off;
3714}
3715
3716
3717/**
3718 * Moves a variable to a different register or spills it onto the stack.
3719 *
3720 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3721 * kinds can easily be recreated if needed later.
3722 *
3723 * @returns The new code buffer position.
3724 * @param pReNative The native recompile state.
3725 * @param off The current code buffer position.
3726 * @param idxVar The variable index.
3727 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3728 * call-volatile registers.
3729 */
3730static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3731 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3732{
3733 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3734 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3735 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3736
3737 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3738 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3739 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3740 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3741 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3742 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3743 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3744 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3745 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3746
3747
3748 /** @todo Add statistics on this.*/
3749 /** @todo Implement basic variable liveness analysis (python) so variables
3750 * can be freed immediately once no longer used. This has the potential to
3751 * be trashing registers and stack for dead variables. */
3752
3753 /*
3754 * First try move it to a different register, as that's cheaper.
3755 */
3756 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3757 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3758 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3759 if (fRegs)
3760 {
3761 /* Avoid using shadow registers, if possible. */
3762 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3763 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3764 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3765 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3766 }
3767
3768 /*
3769 * Otherwise we must spill the register onto the stack.
3770 */
3771 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3772 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3773 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3774 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3775
3776 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3777 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3778 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3779 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3780 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3781 return off;
3782}
3783
3784
3785/**
3786 * Allocates a temporary host general purpose register.
3787 *
3788 * This may emit code to save register content onto the stack in order to free
3789 * up a register.
3790 *
3791 * @returns The host register number; throws VBox status code on failure,
3792 * so no need to check the return value.
3793 * @param pReNative The native recompile state.
3794 * @param poff Pointer to the variable with the code buffer position.
3795 * This will be update if we need to move a variable from
3796 * register to stack in order to satisfy the request.
3797 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3798 * registers (@c true, default) or the other way around
3799 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3800 */
3801DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3802{
3803 /*
3804 * Try find a completely unused register, preferably a call-volatile one.
3805 */
3806 uint8_t idxReg;
3807 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3808 & ~pReNative->Core.bmHstRegsWithGstShadow
3809 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3810 if (fRegs)
3811 {
3812 if (fPreferVolatile)
3813 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3814 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3815 else
3816 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3817 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3818 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3819 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3820 }
3821 else
3822 {
3823 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3824 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3825 }
3826 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3827}
3828
3829
3830/**
3831 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3832 * registers.
3833 *
3834 * @returns The host register number; throws VBox status code on failure,
3835 * so no need to check the return value.
3836 * @param pReNative The native recompile state.
3837 * @param poff Pointer to the variable with the code buffer position.
3838 * This will be update if we need to move a variable from
3839 * register to stack in order to satisfy the request.
3840 * @param fRegMask Mask of acceptable registers.
3841 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3842 * registers (@c true, default) or the other way around
3843 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3844 */
3845DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3846 bool fPreferVolatile /*= true*/)
3847{
3848 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3849 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3850
3851 /*
3852 * Try find a completely unused register, preferably a call-volatile one.
3853 */
3854 uint8_t idxReg;
3855 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3856 & ~pReNative->Core.bmHstRegsWithGstShadow
3857 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3858 & fRegMask;
3859 if (fRegs)
3860 {
3861 if (fPreferVolatile)
3862 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3863 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3864 else
3865 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3866 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3867 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3868 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3869 }
3870 else
3871 {
3872 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3873 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3874 }
3875 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3876}
3877
3878
3879/**
3880 * Allocates a temporary register for loading an immediate value into.
3881 *
3882 * This will emit code to load the immediate, unless there happens to be an
3883 * unused register with the value already loaded.
3884 *
3885 * The caller will not modify the returned register, it must be considered
3886 * read-only. Free using iemNativeRegFreeTmpImm.
3887 *
3888 * @returns The host register number; throws VBox status code on failure, so no
3889 * need to check the return value.
3890 * @param pReNative The native recompile state.
3891 * @param poff Pointer to the variable with the code buffer position.
3892 * @param uImm The immediate value that the register must hold upon
3893 * return.
3894 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3895 * registers (@c true, default) or the other way around
3896 * (@c false).
3897 *
3898 * @note Reusing immediate values has not been implemented yet.
3899 */
3900DECL_HIDDEN_THROW(uint8_t)
3901iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3902{
3903 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3904 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3905 return idxReg;
3906}
3907
3908
3909/**
3910 * Marks host register @a idxHstReg as containing a shadow copy of guest
3911 * register @a enmGstReg.
3912 *
3913 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3914 * host register before calling.
3915 */
3916DECL_FORCE_INLINE(void)
3917iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3918{
3919 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3920 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3921 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3922
3923 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3924 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
3925 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3926 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3927#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3928 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3929 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3930#else
3931 RT_NOREF(off);
3932#endif
3933}
3934
3935
3936/**
3937 * Clear any guest register shadow claims from @a idxHstReg.
3938 *
3939 * The register does not need to be shadowing any guest registers.
3940 */
3941DECL_FORCE_INLINE(void)
3942iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3943{
3944 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3945 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3946 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3947 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3948 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3949
3950#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3951 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3952 if (fGstRegs)
3953 {
3954 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3955 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3956 while (fGstRegs)
3957 {
3958 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3959 fGstRegs &= ~RT_BIT_64(iGstReg);
3960 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3961 }
3962 }
3963#else
3964 RT_NOREF(off);
3965#endif
3966
3967 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3968 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3969 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3970}
3971
3972
3973/**
3974 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3975 * and global overview flags.
3976 */
3977DECL_FORCE_INLINE(void)
3978iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3979{
3980 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3981 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3982 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3983 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3984 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3985 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3986 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3987
3988#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3989 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3990 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3991#else
3992 RT_NOREF(off);
3993#endif
3994
3995 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3996 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3997 if (!fGstRegShadowsNew)
3998 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3999 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4000}
4001
4002
4003/**
4004 * Clear any guest register shadow claim for @a enmGstReg.
4005 */
4006DECL_FORCE_INLINE(void)
4007iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4008{
4009 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4010 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4011 {
4012 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4013 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4014 }
4015}
4016
4017
4018/**
4019 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4020 * as the new shadow of it.
4021 */
4022DECL_FORCE_INLINE(void)
4023iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4024 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4025{
4026 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4027 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4028 {
4029 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4030 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
4031 return;
4032 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4033 }
4034 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4035}
4036
4037
4038/**
4039 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4040 * to @a idxRegTo.
4041 */
4042DECL_FORCE_INLINE(void)
4043iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4044 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4045{
4046 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4047 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4048 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4049 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4050 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4051 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4052 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4053 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4054 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4055
4056 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4057 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4058 if (!fGstRegShadowsFrom)
4059 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4060 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4061 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4062 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4063#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4064 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4065 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4066#else
4067 RT_NOREF(off);
4068#endif
4069}
4070
4071
4072/**
4073 * Allocates a temporary host general purpose register for keeping a guest
4074 * register value.
4075 *
4076 * Since we may already have a register holding the guest register value,
4077 * code will be emitted to do the loading if that's not the case. Code may also
4078 * be emitted if we have to free up a register to satify the request.
4079 *
4080 * @returns The host register number; throws VBox status code on failure, so no
4081 * need to check the return value.
4082 * @param pReNative The native recompile state.
4083 * @param poff Pointer to the variable with the code buffer
4084 * position. This will be update if we need to move a
4085 * variable from register to stack in order to satisfy
4086 * the request.
4087 * @param enmGstReg The guest register that will is to be updated.
4088 * @param enmIntendedUse How the caller will be using the host register.
4089 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4090 * register is okay (default). The ASSUMPTION here is
4091 * that the caller has already flushed all volatile
4092 * registers, so this is only applied if we allocate a
4093 * new register.
4094 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4095 */
4096DECL_HIDDEN_THROW(uint8_t)
4097iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4098 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4099 bool fNoVolatileRegs /*= false*/)
4100{
4101 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4102#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4103 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4104#endif
4105 uint32_t const fRegMask = !fNoVolatileRegs
4106 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4107 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4108
4109 /*
4110 * First check if the guest register value is already in a host register.
4111 */
4112 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4113 {
4114 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4115 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4116 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4117 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4118
4119 /* It's not supposed to be allocated... */
4120 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4121 {
4122 /*
4123 * If the register will trash the guest shadow copy, try find a
4124 * completely unused register we can use instead. If that fails,
4125 * we need to disassociate the host reg from the guest reg.
4126 */
4127 /** @todo would be nice to know if preserving the register is in any way helpful. */
4128 /* If the purpose is calculations, try duplicate the register value as
4129 we'll be clobbering the shadow. */
4130 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4131 && ( ~pReNative->Core.bmHstRegs
4132 & ~pReNative->Core.bmHstRegsWithGstShadow
4133 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4134 {
4135 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4136
4137 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4138
4139 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4140 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4141 g_apszIemNativeHstRegNames[idxRegNew]));
4142 idxReg = idxRegNew;
4143 }
4144 /* If the current register matches the restrictions, go ahead and allocate
4145 it for the caller. */
4146 else if (fRegMask & RT_BIT_32(idxReg))
4147 {
4148 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4149 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4150 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4151 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4152 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4153 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4154 else
4155 {
4156 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4157 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4158 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4159 }
4160 }
4161 /* Otherwise, allocate a register that satisfies the caller and transfer
4162 the shadowing if compatible with the intended use. (This basically
4163 means the call wants a non-volatile register (RSP push/pop scenario).) */
4164 else
4165 {
4166 Assert(fNoVolatileRegs);
4167 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4168 !fNoVolatileRegs
4169 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4170 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4171 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4172 {
4173 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4174 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4175 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4176 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4177 }
4178 else
4179 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4180 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4181 g_apszIemNativeHstRegNames[idxRegNew]));
4182 idxReg = idxRegNew;
4183 }
4184 }
4185 else
4186 {
4187 /*
4188 * Oops. Shadowed guest register already allocated!
4189 *
4190 * Allocate a new register, copy the value and, if updating, the
4191 * guest shadow copy assignment to the new register.
4192 */
4193 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4194 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4195 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4196 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4197
4198 /** @todo share register for readonly access. */
4199 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4200 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4201
4202 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4203 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4204
4205 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4206 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4207 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4208 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4209 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4210 else
4211 {
4212 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4213 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4214 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4215 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4216 }
4217 idxReg = idxRegNew;
4218 }
4219 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4220
4221#ifdef VBOX_STRICT
4222 /* Strict builds: Check that the value is correct. */
4223 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4224#endif
4225
4226 return idxReg;
4227 }
4228
4229 /*
4230 * Allocate a new register, load it with the guest value and designate it as a copy of the
4231 */
4232 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4233
4234 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4235 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4236
4237 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4238 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4239 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4240 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4241
4242 return idxRegNew;
4243}
4244
4245
4246/**
4247 * Allocates a temporary host general purpose register that already holds the
4248 * given guest register value.
4249 *
4250 * The use case for this function is places where the shadowing state cannot be
4251 * modified due to branching and such. This will fail if the we don't have a
4252 * current shadow copy handy or if it's incompatible. The only code that will
4253 * be emitted here is value checking code in strict builds.
4254 *
4255 * The intended use can only be readonly!
4256 *
4257 * @returns The host register number, UINT8_MAX if not present.
4258 * @param pReNative The native recompile state.
4259 * @param poff Pointer to the instruction buffer offset.
4260 * Will be updated in strict builds if a register is
4261 * found.
4262 * @param enmGstReg The guest register that will is to be updated.
4263 * @note In strict builds, this may throw instruction buffer growth failures.
4264 * Non-strict builds will not throw anything.
4265 * @sa iemNativeRegAllocTmpForGuestReg
4266 */
4267DECL_HIDDEN_THROW(uint8_t)
4268iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4269{
4270 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4271
4272 /*
4273 * First check if the guest register value is already in a host register.
4274 */
4275 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4276 {
4277 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4278 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4279 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4280 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4281
4282 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4283 {
4284 /*
4285 * We only do readonly use here, so easy compared to the other
4286 * variant of this code.
4287 */
4288 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4289 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4290 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4291 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4292 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4293
4294#ifdef VBOX_STRICT
4295 /* Strict builds: Check that the value is correct. */
4296 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4297#else
4298 RT_NOREF(poff);
4299#endif
4300 return idxReg;
4301 }
4302 }
4303
4304 return UINT8_MAX;
4305}
4306
4307
4308DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
4309
4310
4311/**
4312 * Allocates argument registers for a function call.
4313 *
4314 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4315 * need to check the return value.
4316 * @param pReNative The native recompile state.
4317 * @param off The current code buffer offset.
4318 * @param cArgs The number of arguments the function call takes.
4319 */
4320DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4321{
4322 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4323 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4324 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4325 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4326
4327 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4328 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4329 else if (cArgs == 0)
4330 return true;
4331
4332 /*
4333 * Do we get luck and all register are free and not shadowing anything?
4334 */
4335 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4336 for (uint32_t i = 0; i < cArgs; i++)
4337 {
4338 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4339 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4340 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4341 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4342 }
4343 /*
4344 * Okay, not lucky so we have to free up the registers.
4345 */
4346 else
4347 for (uint32_t i = 0; i < cArgs; i++)
4348 {
4349 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4350 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4351 {
4352 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4353 {
4354 case kIemNativeWhat_Var:
4355 {
4356 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4357 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
4358 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4359 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4360 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4361
4362 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4363 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4364 else
4365 {
4366 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4367 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4368 }
4369 break;
4370 }
4371
4372 case kIemNativeWhat_Tmp:
4373 case kIemNativeWhat_Arg:
4374 case kIemNativeWhat_rc:
4375 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4376 default:
4377 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4378 }
4379
4380 }
4381 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4382 {
4383 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4384 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4385 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4386 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4387 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4388 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4389 }
4390 else
4391 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4392 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4393 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4394 }
4395 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4396 return true;
4397}
4398
4399
4400DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4401
4402
4403#if 0
4404/**
4405 * Frees a register assignment of any type.
4406 *
4407 * @param pReNative The native recompile state.
4408 * @param idxHstReg The register to free.
4409 *
4410 * @note Does not update variables.
4411 */
4412DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4413{
4414 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4415 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4416 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4417 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4418 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4419 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4420 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4421 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4422 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4423 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4424 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4425 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4426 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4427 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4428
4429 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4430 /* no flushing, right:
4431 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4432 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4433 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4434 */
4435}
4436#endif
4437
4438
4439/**
4440 * Frees a temporary register.
4441 *
4442 * Any shadow copies of guest registers assigned to the host register will not
4443 * be flushed by this operation.
4444 */
4445DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4446{
4447 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4448 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4449 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4450 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4451 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4452}
4453
4454
4455/**
4456 * Frees a temporary immediate register.
4457 *
4458 * It is assumed that the call has not modified the register, so it still hold
4459 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4460 */
4461DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4462{
4463 iemNativeRegFreeTmp(pReNative, idxHstReg);
4464}
4465
4466
4467/**
4468 * Frees a register assigned to a variable.
4469 *
4470 * The register will be disassociated from the variable.
4471 */
4472DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4473{
4474 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4475 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4476 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4477 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4478 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4479
4480 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4481 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4482 if (!fFlushShadows)
4483 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4484 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4485 else
4486 {
4487 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4488 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4489 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4490 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4491 uint64_t fGstRegShadows = fGstRegShadowsOld;
4492 while (fGstRegShadows)
4493 {
4494 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4495 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4496
4497 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4498 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4499 }
4500 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4501 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4502 }
4503}
4504
4505
4506/**
4507 * Called right before emitting a call instruction to move anything important
4508 * out of call-volatile registers, free and flush the call-volatile registers,
4509 * optionally freeing argument variables.
4510 *
4511 * @returns New code buffer offset, UINT32_MAX on failure.
4512 * @param pReNative The native recompile state.
4513 * @param off The code buffer offset.
4514 * @param cArgs The number of arguments the function call takes.
4515 * It is presumed that the host register part of these have
4516 * been allocated as such already and won't need moving,
4517 * just freeing.
4518 * @param fKeepVars Mask of variables that should keep their register
4519 * assignments. Caller must take care to handle these.
4520 */
4521DECL_HIDDEN_THROW(uint32_t)
4522iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4523{
4524 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4525
4526 /* fKeepVars will reduce this mask. */
4527 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4528
4529 /*
4530 * Move anything important out of volatile registers.
4531 */
4532 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4533 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4534 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4535#ifdef IEMNATIVE_REG_FIXED_TMP0
4536 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4537#endif
4538 & ~g_afIemNativeCallRegs[cArgs];
4539
4540 fRegsToMove &= pReNative->Core.bmHstRegs;
4541 if (!fRegsToMove)
4542 { /* likely */ }
4543 else
4544 {
4545 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4546 while (fRegsToMove != 0)
4547 {
4548 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4549 fRegsToMove &= ~RT_BIT_32(idxReg);
4550
4551 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4552 {
4553 case kIemNativeWhat_Var:
4554 {
4555 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4556 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4557 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4558 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4559 if (!(RT_BIT_32(idxVar) & fKeepVars))
4560 {
4561 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4562 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4563 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4564 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4565 else
4566 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4567 }
4568 else
4569 fRegsToFree &= ~RT_BIT_32(idxReg);
4570 continue;
4571 }
4572
4573 case kIemNativeWhat_Arg:
4574 AssertMsgFailed(("What?!?: %u\n", idxReg));
4575 continue;
4576
4577 case kIemNativeWhat_rc:
4578 case kIemNativeWhat_Tmp:
4579 AssertMsgFailed(("Missing free: %u\n", idxReg));
4580 continue;
4581
4582 case kIemNativeWhat_FixedTmp:
4583 case kIemNativeWhat_pVCpuFixed:
4584 case kIemNativeWhat_pCtxFixed:
4585 case kIemNativeWhat_FixedReserved:
4586 case kIemNativeWhat_Invalid:
4587 case kIemNativeWhat_End:
4588 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4589 }
4590 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4591 }
4592 }
4593
4594 /*
4595 * Do the actual freeing.
4596 */
4597 if (pReNative->Core.bmHstRegs & fRegsToFree)
4598 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4599 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4600 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4601
4602 /* If there are guest register shadows in any call-volatile register, we
4603 have to clear the corrsponding guest register masks for each register. */
4604 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4605 if (fHstRegsWithGstShadow)
4606 {
4607 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4608 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4609 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4610 do
4611 {
4612 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4613 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4614
4615 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4616 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4617 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4618 } while (fHstRegsWithGstShadow != 0);
4619 }
4620
4621 return off;
4622}
4623
4624
4625/**
4626 * Flushes a set of guest register shadow copies.
4627 *
4628 * This is usually done after calling a threaded function or a C-implementation
4629 * of an instruction.
4630 *
4631 * @param pReNative The native recompile state.
4632 * @param fGstRegs Set of guest registers to flush.
4633 */
4634DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4635{
4636 /*
4637 * Reduce the mask by what's currently shadowed
4638 */
4639 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4640 fGstRegs &= bmGstRegShadowsOld;
4641 if (fGstRegs)
4642 {
4643 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4644 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4645 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4646 if (bmGstRegShadowsNew)
4647 {
4648 /*
4649 * Partial.
4650 */
4651 do
4652 {
4653 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4654 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4655 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4656 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4657 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4658
4659 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4660 fGstRegs &= ~fInThisHstReg;
4661 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4662 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4663 if (!fGstRegShadowsNew)
4664 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4665 } while (fGstRegs != 0);
4666 }
4667 else
4668 {
4669 /*
4670 * Clear all.
4671 */
4672 do
4673 {
4674 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4675 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4676 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4677 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4678 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4679
4680 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4681 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4682 } while (fGstRegs != 0);
4683 pReNative->Core.bmHstRegsWithGstShadow = 0;
4684 }
4685 }
4686}
4687
4688
4689/**
4690 * Flushes guest register shadow copies held by a set of host registers.
4691 *
4692 * This is used with the TLB lookup code for ensuring that we don't carry on
4693 * with any guest shadows in volatile registers, as these will get corrupted by
4694 * a TLB miss.
4695 *
4696 * @param pReNative The native recompile state.
4697 * @param fHstRegs Set of host registers to flush guest shadows for.
4698 */
4699DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4700{
4701 /*
4702 * Reduce the mask by what's currently shadowed.
4703 */
4704 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4705 fHstRegs &= bmHstRegsWithGstShadowOld;
4706 if (fHstRegs)
4707 {
4708 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4709 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4710 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4711 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4712 if (bmHstRegsWithGstShadowNew)
4713 {
4714 /*
4715 * Partial (likely).
4716 */
4717 uint64_t fGstShadows = 0;
4718 do
4719 {
4720 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4721 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4722 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4723 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4724
4725 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4726 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4727 fHstRegs &= ~RT_BIT_32(idxHstReg);
4728 } while (fHstRegs != 0);
4729 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4730 }
4731 else
4732 {
4733 /*
4734 * Clear all.
4735 */
4736 do
4737 {
4738 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4739 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4740 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4741 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4742
4743 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4744 fHstRegs &= ~RT_BIT_32(idxHstReg);
4745 } while (fHstRegs != 0);
4746 pReNative->Core.bmGstRegShadows = 0;
4747 }
4748 }
4749}
4750
4751
4752/**
4753 * Restores guest shadow copies in volatile registers.
4754 *
4755 * This is used after calling a helper function (think TLB miss) to restore the
4756 * register state of volatile registers.
4757 *
4758 * @param pReNative The native recompile state.
4759 * @param off The code buffer offset.
4760 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4761 * be active (allocated) w/o asserting. Hack.
4762 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4763 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4764 */
4765DECL_HIDDEN_THROW(uint32_t)
4766iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4767{
4768 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4769 if (fHstRegs)
4770 {
4771 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4772 do
4773 {
4774 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4775
4776 /* It's not fatal if a register is active holding a variable that
4777 shadowing a guest register, ASSUMING all pending guest register
4778 writes were flushed prior to the helper call. However, we'll be
4779 emitting duplicate restores, so it wasts code space. */
4780 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4781 RT_NOREF(fHstRegsActiveShadows);
4782
4783 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4784 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4785 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4786 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4787
4788 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4789 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4790
4791 fHstRegs &= ~RT_BIT_32(idxHstReg);
4792 } while (fHstRegs != 0);
4793 }
4794 return off;
4795}
4796
4797
4798/**
4799 * Flushes delayed write of a specific guest register.
4800 *
4801 * This must be called prior to calling CImpl functions and any helpers that use
4802 * the guest state (like raising exceptions) and such.
4803 *
4804 * This optimization has not yet been implemented. The first target would be
4805 * RIP updates, since these are the most common ones.
4806 */
4807DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4808 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4809{
4810 RT_NOREF(pReNative, enmClass, idxReg);
4811 return off;
4812}
4813
4814
4815/**
4816 * Flushes any delayed guest register writes.
4817 *
4818 * This must be called prior to calling CImpl functions and any helpers that use
4819 * the guest state (like raising exceptions) and such.
4820 *
4821 * This optimization has not yet been implemented. The first target would be
4822 * RIP updates, since these are the most common ones.
4823 */
4824DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4825{
4826 RT_NOREF(pReNative, off);
4827 return off;
4828}
4829
4830
4831#ifdef VBOX_STRICT
4832/**
4833 * Does internal register allocator sanity checks.
4834 */
4835static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
4836{
4837 /*
4838 * Iterate host registers building a guest shadowing set.
4839 */
4840 uint64_t bmGstRegShadows = 0;
4841 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
4842 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
4843 while (bmHstRegsWithGstShadow)
4844 {
4845 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
4846 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4847 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4848
4849 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4850 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
4851 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
4852 bmGstRegShadows |= fThisGstRegShadows;
4853 while (fThisGstRegShadows)
4854 {
4855 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
4856 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
4857 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
4858 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
4859 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
4860 }
4861 }
4862 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
4863 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
4864 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
4865
4866 /*
4867 * Now the other way around, checking the guest to host index array.
4868 */
4869 bmHstRegsWithGstShadow = 0;
4870 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
4871 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4872 while (bmGstRegShadows)
4873 {
4874 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
4875 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4876 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
4877
4878 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4879 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
4880 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
4881 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
4882 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4883 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4884 }
4885 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4886 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4887 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4888}
4889#endif
4890
4891
4892/*********************************************************************************************************************************
4893* Code Emitters (larger snippets) *
4894*********************************************************************************************************************************/
4895
4896/**
4897 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4898 * extending to 64-bit width.
4899 *
4900 * @returns New code buffer offset on success, UINT32_MAX on failure.
4901 * @param pReNative .
4902 * @param off The current code buffer position.
4903 * @param idxHstReg The host register to load the guest register value into.
4904 * @param enmGstReg The guest register to load.
4905 *
4906 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4907 * that is something the caller needs to do if applicable.
4908 */
4909DECL_HIDDEN_THROW(uint32_t)
4910iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4911{
4912 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4913 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4914
4915 switch (g_aGstShadowInfo[enmGstReg].cb)
4916 {
4917 case sizeof(uint64_t):
4918 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4919 case sizeof(uint32_t):
4920 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4921 case sizeof(uint16_t):
4922 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4923#if 0 /* not present in the table. */
4924 case sizeof(uint8_t):
4925 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4926#endif
4927 default:
4928 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4929 }
4930}
4931
4932
4933#ifdef VBOX_STRICT
4934/**
4935 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4936 *
4937 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4938 * Trashes EFLAGS on AMD64.
4939 */
4940static uint32_t
4941iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4942{
4943# ifdef RT_ARCH_AMD64
4944 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4945
4946 /* rol reg64, 32 */
4947 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4948 pbCodeBuf[off++] = 0xc1;
4949 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4950 pbCodeBuf[off++] = 32;
4951
4952 /* test reg32, ffffffffh */
4953 if (idxReg >= 8)
4954 pbCodeBuf[off++] = X86_OP_REX_B;
4955 pbCodeBuf[off++] = 0xf7;
4956 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4957 pbCodeBuf[off++] = 0xff;
4958 pbCodeBuf[off++] = 0xff;
4959 pbCodeBuf[off++] = 0xff;
4960 pbCodeBuf[off++] = 0xff;
4961
4962 /* je/jz +1 */
4963 pbCodeBuf[off++] = 0x74;
4964 pbCodeBuf[off++] = 0x01;
4965
4966 /* int3 */
4967 pbCodeBuf[off++] = 0xcc;
4968
4969 /* rol reg64, 32 */
4970 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4971 pbCodeBuf[off++] = 0xc1;
4972 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4973 pbCodeBuf[off++] = 32;
4974
4975# elif defined(RT_ARCH_ARM64)
4976 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4977 /* lsr tmp0, reg64, #32 */
4978 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4979 /* cbz tmp0, +1 */
4980 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4981 /* brk #0x1100 */
4982 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4983
4984# else
4985# error "Port me!"
4986# endif
4987 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4988 return off;
4989}
4990#endif /* VBOX_STRICT */
4991
4992
4993#ifdef VBOX_STRICT
4994/**
4995 * Emitting code that checks that the content of register @a idxReg is the same
4996 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4997 * instruction if that's not the case.
4998 *
4999 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5000 * Trashes EFLAGS on AMD64.
5001 */
5002static uint32_t
5003iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5004{
5005# ifdef RT_ARCH_AMD64
5006 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5007
5008 /* cmp reg, [mem] */
5009 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5010 {
5011 if (idxReg >= 8)
5012 pbCodeBuf[off++] = X86_OP_REX_R;
5013 pbCodeBuf[off++] = 0x38;
5014 }
5015 else
5016 {
5017 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5018 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5019 else
5020 {
5021 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5022 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5023 else
5024 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5025 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5026 if (idxReg >= 8)
5027 pbCodeBuf[off++] = X86_OP_REX_R;
5028 }
5029 pbCodeBuf[off++] = 0x39;
5030 }
5031 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5032
5033 /* je/jz +1 */
5034 pbCodeBuf[off++] = 0x74;
5035 pbCodeBuf[off++] = 0x01;
5036
5037 /* int3 */
5038 pbCodeBuf[off++] = 0xcc;
5039
5040 /* For values smaller than the register size, we must check that the rest
5041 of the register is all zeros. */
5042 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5043 {
5044 /* test reg64, imm32 */
5045 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5046 pbCodeBuf[off++] = 0xf7;
5047 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5048 pbCodeBuf[off++] = 0;
5049 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5050 pbCodeBuf[off++] = 0xff;
5051 pbCodeBuf[off++] = 0xff;
5052
5053 /* je/jz +1 */
5054 pbCodeBuf[off++] = 0x74;
5055 pbCodeBuf[off++] = 0x01;
5056
5057 /* int3 */
5058 pbCodeBuf[off++] = 0xcc;
5059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5060 }
5061 else
5062 {
5063 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5064 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5065 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5066 }
5067
5068# elif defined(RT_ARCH_ARM64)
5069 /* mov TMP0, [gstreg] */
5070 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5071
5072 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5073 /* sub tmp0, tmp0, idxReg */
5074 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5075 /* cbz tmp0, +1 */
5076 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5077 /* brk #0x1000+enmGstReg */
5078 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5079 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5080
5081# else
5082# error "Port me!"
5083# endif
5084 return off;
5085}
5086#endif /* VBOX_STRICT */
5087
5088
5089#ifdef VBOX_STRICT
5090/**
5091 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5092 * important bits.
5093 *
5094 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5095 * Trashes EFLAGS on AMD64.
5096 */
5097static uint32_t
5098iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5099{
5100 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5101 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5102 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5103 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5104
5105#ifdef RT_ARCH_AMD64
5106 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5107
5108 /* je/jz +1 */
5109 pbCodeBuf[off++] = 0x74;
5110 pbCodeBuf[off++] = 0x01;
5111
5112 /* int3 */
5113 pbCodeBuf[off++] = 0xcc;
5114
5115# elif defined(RT_ARCH_ARM64)
5116 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5117
5118 /* b.eq +1 */
5119 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5120 /* brk #0x2000 */
5121 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5122
5123# else
5124# error "Port me!"
5125# endif
5126 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5127
5128 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5129 return off;
5130}
5131#endif /* VBOX_STRICT */
5132
5133
5134/**
5135 * Emits a code for checking the return code of a call and rcPassUp, returning
5136 * from the code if either are non-zero.
5137 */
5138DECL_HIDDEN_THROW(uint32_t)
5139iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5140{
5141#ifdef RT_ARCH_AMD64
5142 /*
5143 * AMD64: eax = call status code.
5144 */
5145
5146 /* edx = rcPassUp */
5147 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5148# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5149 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5150# endif
5151
5152 /* edx = eax | rcPassUp */
5153 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5154 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5155 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5156 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5157
5158 /* Jump to non-zero status return path. */
5159 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5160
5161 /* done. */
5162
5163#elif RT_ARCH_ARM64
5164 /*
5165 * ARM64: w0 = call status code.
5166 */
5167# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5168 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5169# endif
5170 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5171
5172 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5173
5174 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5175
5176 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5177 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5178 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5179
5180#else
5181# error "port me"
5182#endif
5183 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5184 RT_NOREF_PV(idxInstr);
5185 return off;
5186}
5187
5188
5189/**
5190 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5191 * raising a \#GP(0) if it isn't.
5192 *
5193 * @returns New code buffer offset, UINT32_MAX on failure.
5194 * @param pReNative The native recompile state.
5195 * @param off The code buffer offset.
5196 * @param idxAddrReg The host register with the address to check.
5197 * @param idxInstr The current instruction.
5198 */
5199DECL_HIDDEN_THROW(uint32_t)
5200iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5201{
5202 /*
5203 * Make sure we don't have any outstanding guest register writes as we may
5204 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5205 */
5206 off = iemNativeRegFlushPendingWrites(pReNative, off);
5207
5208#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5209 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5210#else
5211 RT_NOREF(idxInstr);
5212#endif
5213
5214#ifdef RT_ARCH_AMD64
5215 /*
5216 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5217 * return raisexcpt();
5218 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5219 */
5220 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5221
5222 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5223 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5224 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5225 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5226 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5227
5228 iemNativeRegFreeTmp(pReNative, iTmpReg);
5229
5230#elif defined(RT_ARCH_ARM64)
5231 /*
5232 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5233 * return raisexcpt();
5234 * ----
5235 * mov x1, 0x800000000000
5236 * add x1, x0, x1
5237 * cmp xzr, x1, lsr 48
5238 * b.ne .Lraisexcpt
5239 */
5240 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5241
5242 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5243 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5244 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5245 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5246
5247 iemNativeRegFreeTmp(pReNative, iTmpReg);
5248
5249#else
5250# error "Port me"
5251#endif
5252 return off;
5253}
5254
5255
5256/**
5257 * Emits code to check if the content of @a idxAddrReg is within the limit of
5258 * idxSegReg, raising a \#GP(0) if it isn't.
5259 *
5260 * @returns New code buffer offset; throws VBox status code on error.
5261 * @param pReNative The native recompile state.
5262 * @param off The code buffer offset.
5263 * @param idxAddrReg The host register (32-bit) with the address to
5264 * check.
5265 * @param idxSegReg The segment register (X86_SREG_XXX) to check
5266 * against.
5267 * @param idxInstr The current instruction.
5268 */
5269DECL_HIDDEN_THROW(uint32_t)
5270iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5271 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
5272{
5273 /*
5274 * Make sure we don't have any outstanding guest register writes as we may
5275 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5276 */
5277 off = iemNativeRegFlushPendingWrites(pReNative, off);
5278
5279#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5280 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5281#else
5282 RT_NOREF(idxInstr);
5283#endif
5284
5285 /** @todo implement expand down/whatnot checking */
5286 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
5287
5288 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5289 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
5290 kIemNativeGstRegUse_ForUpdate);
5291
5292 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
5293 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5294
5295 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
5296 return off;
5297}
5298
5299
5300/**
5301 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5302 *
5303 * @returns The flush mask.
5304 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5305 * @param fGstShwFlush The starting flush mask.
5306 */
5307DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5308{
5309 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5310 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5311 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5312 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5313 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5314 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5315 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5316 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5317 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5318 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5319 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5320 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5321 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5322 return fGstShwFlush;
5323}
5324
5325
5326/**
5327 * Emits a call to a CImpl function or something similar.
5328 */
5329DECL_HIDDEN_THROW(uint32_t)
5330iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5331 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5332{
5333 /*
5334 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5335 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5336 */
5337 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5338 fGstShwFlush
5339 | RT_BIT_64(kIemNativeGstReg_Pc)
5340 | RT_BIT_64(kIemNativeGstReg_EFlags));
5341 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5342
5343 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5344
5345 /*
5346 * Load the parameters.
5347 */
5348#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5349 /* Special code the hidden VBOXSTRICTRC pointer. */
5350 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5351 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5352 if (cAddParams > 0)
5353 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5354 if (cAddParams > 1)
5355 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5356 if (cAddParams > 2)
5357 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5358 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5359
5360#else
5361 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5362 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5363 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5364 if (cAddParams > 0)
5365 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5366 if (cAddParams > 1)
5367 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5368 if (cAddParams > 2)
5369# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5370 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5371# else
5372 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5373# endif
5374#endif
5375
5376 /*
5377 * Make the call.
5378 */
5379 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5380
5381#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5382 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5383#endif
5384
5385 /*
5386 * Check the status code.
5387 */
5388 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5389}
5390
5391
5392/**
5393 * Emits a call to a threaded worker function.
5394 */
5395DECL_HIDDEN_THROW(uint32_t)
5396iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5397{
5398 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5399 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5400
5401#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5402 /* The threaded function may throw / long jmp, so set current instruction
5403 number if we're counting. */
5404 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5405#endif
5406
5407 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5408
5409#ifdef RT_ARCH_AMD64
5410 /* Load the parameters and emit the call. */
5411# ifdef RT_OS_WINDOWS
5412# ifndef VBOXSTRICTRC_STRICT_ENABLED
5413 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5414 if (cParams > 0)
5415 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5416 if (cParams > 1)
5417 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5418 if (cParams > 2)
5419 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5420# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5421 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5422 if (cParams > 0)
5423 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5424 if (cParams > 1)
5425 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5426 if (cParams > 2)
5427 {
5428 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5429 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5430 }
5431 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5432# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5433# else
5434 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5435 if (cParams > 0)
5436 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5437 if (cParams > 1)
5438 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5439 if (cParams > 2)
5440 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5441# endif
5442
5443 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5444
5445# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5446 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5447# endif
5448
5449#elif RT_ARCH_ARM64
5450 /*
5451 * ARM64:
5452 */
5453 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5454 if (cParams > 0)
5455 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5456 if (cParams > 1)
5457 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5458 if (cParams > 2)
5459 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5460
5461 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5462
5463#else
5464# error "port me"
5465#endif
5466
5467 /*
5468 * Check the status code.
5469 */
5470 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5471
5472 return off;
5473}
5474
5475
5476/**
5477 * Emits the code at the CheckBranchMiss label.
5478 */
5479static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5480{
5481 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5482 if (idxLabel != UINT32_MAX)
5483 {
5484 iemNativeLabelDefine(pReNative, idxLabel, off);
5485
5486 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5487 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5488 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5489
5490 /* jump back to the return sequence. */
5491 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5492 }
5493 return off;
5494}
5495
5496
5497/**
5498 * Emits the code at the NeedCsLimChecking label.
5499 */
5500static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5501{
5502 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5503 if (idxLabel != UINT32_MAX)
5504 {
5505 iemNativeLabelDefine(pReNative, idxLabel, off);
5506
5507 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5508 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5509 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5510
5511 /* jump back to the return sequence. */
5512 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5513 }
5514 return off;
5515}
5516
5517
5518/**
5519 * Emits the code at the ObsoleteTb label.
5520 */
5521static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5522{
5523 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5524 if (idxLabel != UINT32_MAX)
5525 {
5526 iemNativeLabelDefine(pReNative, idxLabel, off);
5527
5528 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5529 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5530 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5531
5532 /* jump back to the return sequence. */
5533 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5534 }
5535 return off;
5536}
5537
5538
5539/**
5540 * Emits the code at the RaiseGP0 label.
5541 */
5542static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5543{
5544 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5545 if (idxLabel != UINT32_MAX)
5546 {
5547 iemNativeLabelDefine(pReNative, idxLabel, off);
5548
5549 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5550 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5551 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5552
5553 /* jump back to the return sequence. */
5554 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5555 }
5556 return off;
5557}
5558
5559
5560/**
5561 * Emits the code at the ReturnWithFlags label (returns
5562 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5563 */
5564static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5565{
5566 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5567 if (idxLabel != UINT32_MAX)
5568 {
5569 iemNativeLabelDefine(pReNative, idxLabel, off);
5570
5571 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5572
5573 /* jump back to the return sequence. */
5574 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5575 }
5576 return off;
5577}
5578
5579
5580/**
5581 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5582 */
5583static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5584{
5585 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5586 if (idxLabel != UINT32_MAX)
5587 {
5588 iemNativeLabelDefine(pReNative, idxLabel, off);
5589
5590 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5591
5592 /* jump back to the return sequence. */
5593 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5594 }
5595 return off;
5596}
5597
5598
5599/**
5600 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5601 */
5602static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5603{
5604 /*
5605 * Generate the rc + rcPassUp fiddling code if needed.
5606 */
5607 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5608 if (idxLabel != UINT32_MAX)
5609 {
5610 iemNativeLabelDefine(pReNative, idxLabel, off);
5611
5612 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5613#ifdef RT_ARCH_AMD64
5614# ifdef RT_OS_WINDOWS
5615# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5616 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5617# endif
5618 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5619 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5620# else
5621 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5622 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5623# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5624 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5625# endif
5626# endif
5627# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5628 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5629# endif
5630
5631#else
5632 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5633 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5634 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5635#endif
5636
5637 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5638 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5639 }
5640 return off;
5641}
5642
5643
5644/**
5645 * Emits a standard epilog.
5646 */
5647static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5648{
5649 *pidxReturnLabel = UINT32_MAX;
5650
5651 /*
5652 * Successful return, so clear the return register (eax, w0).
5653 */
5654 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5655
5656 /*
5657 * Define label for common return point.
5658 */
5659 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5660 *pidxReturnLabel = idxReturn;
5661
5662 /*
5663 * Restore registers and return.
5664 */
5665#ifdef RT_ARCH_AMD64
5666 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5667
5668 /* Reposition esp at the r15 restore point. */
5669 pbCodeBuf[off++] = X86_OP_REX_W;
5670 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5671 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5672 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5673
5674 /* Pop non-volatile registers and return */
5675 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5676 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5677 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5678 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5679 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5680 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5681 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5682 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5683# ifdef RT_OS_WINDOWS
5684 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5685 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5686# endif
5687 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5688 pbCodeBuf[off++] = 0xc9; /* leave */
5689 pbCodeBuf[off++] = 0xc3; /* ret */
5690 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5691
5692#elif RT_ARCH_ARM64
5693 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5694
5695 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5696 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5697 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5698 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5699 IEMNATIVE_FRAME_VAR_SIZE / 8);
5700 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5701 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5702 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5703 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5704 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5705 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5706 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5707 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5708 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5709 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5710 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5711 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5712
5713 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5714 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5715 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5716 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5717
5718 /* retab / ret */
5719# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5720 if (1)
5721 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5722 else
5723# endif
5724 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5725
5726#else
5727# error "port me"
5728#endif
5729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5730
5731 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5732}
5733
5734
5735/**
5736 * Emits a standard prolog.
5737 */
5738static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5739{
5740#ifdef RT_ARCH_AMD64
5741 /*
5742 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5743 * reserving 64 bytes for stack variables plus 4 non-register argument
5744 * slots. Fixed register assignment: xBX = pReNative;
5745 *
5746 * Since we always do the same register spilling, we can use the same
5747 * unwind description for all the code.
5748 */
5749 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5750 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5751 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5752 pbCodeBuf[off++] = 0x8b;
5753 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5754 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5755 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5756# ifdef RT_OS_WINDOWS
5757 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5758 pbCodeBuf[off++] = 0x8b;
5759 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5760 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5761 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5762# else
5763 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5764 pbCodeBuf[off++] = 0x8b;
5765 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5766# endif
5767 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5768 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5769 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5770 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5771 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5772 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5773 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5774 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5775
5776 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5777 X86_GREG_xSP,
5778 IEMNATIVE_FRAME_ALIGN_SIZE
5779 + IEMNATIVE_FRAME_VAR_SIZE
5780 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5781 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5782 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5783 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5784 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5785
5786#elif RT_ARCH_ARM64
5787 /*
5788 * We set up a stack frame exactly like on x86, only we have to push the
5789 * return address our selves here. We save all non-volatile registers.
5790 */
5791 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5792
5793# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5794 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5795 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5796 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5797 /* pacibsp */
5798 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5799# endif
5800
5801 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5802 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5803 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5804 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5805 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5806 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5807 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5808 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5809 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5810 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5811 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5812 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5813 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5814 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5815 /* Save the BP and LR (ret address) registers at the top of the frame. */
5816 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5817 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5818 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5819 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5820 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
5821 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
5822
5823 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
5824 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
5825
5826 /* mov r28, r0 */
5827 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
5828 /* mov r27, r1 */
5829 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
5830
5831#else
5832# error "port me"
5833#endif
5834 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5835 return off;
5836}
5837
5838
5839
5840
5841/*********************************************************************************************************************************
5842* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
5843*********************************************************************************************************************************/
5844
5845#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
5846 { \
5847 Assert(pReNative->Core.bmVars == 0); \
5848 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
5849 Assert(pReNative->Core.bmStack == 0); \
5850 pReNative->fMc = (a_fMcFlags); \
5851 pReNative->fCImpl = (a_fCImplFlags); \
5852 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
5853
5854/** We have to get to the end in recompilation mode, as otherwise we won't
5855 * generate code for all the IEM_MC_IF_XXX branches. */
5856#define IEM_MC_END() \
5857 iemNativeVarFreeAll(pReNative); \
5858 } return off
5859
5860
5861
5862/*********************************************************************************************************************************
5863* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
5864*********************************************************************************************************************************/
5865
5866#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
5867 pReNative->fMc = 0; \
5868 pReNative->fCImpl = (a_fFlags); \
5869 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
5870
5871
5872#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5873 pReNative->fMc = 0; \
5874 pReNative->fCImpl = (a_fFlags); \
5875 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
5876
5877DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5878 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5879 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
5880{
5881 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
5882}
5883
5884
5885#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5886 pReNative->fMc = 0; \
5887 pReNative->fCImpl = (a_fFlags); \
5888 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5889 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
5890
5891DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5892 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5893 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
5894{
5895 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
5896}
5897
5898
5899#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5900 pReNative->fMc = 0; \
5901 pReNative->fCImpl = (a_fFlags); \
5902 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5903 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
5904
5905DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5906 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5907 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
5908 uint64_t uArg2)
5909{
5910 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
5911}
5912
5913
5914
5915/*********************************************************************************************************************************
5916* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
5917*********************************************************************************************************************************/
5918
5919/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
5920 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5921DECL_INLINE_THROW(uint32_t)
5922iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5923{
5924 /*
5925 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5926 * return with special status code and make the execution loop deal with
5927 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5928 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5929 * could continue w/o interruption, it probably will drop into the
5930 * debugger, so not worth the effort of trying to services it here and we
5931 * just lump it in with the handling of the others.
5932 *
5933 * To simplify the code and the register state management even more (wrt
5934 * immediate in AND operation), we always update the flags and skip the
5935 * extra check associated conditional jump.
5936 */
5937 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5938 <= UINT32_MAX);
5939 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5940 kIemNativeGstRegUse_ForUpdate);
5941 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5942 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5943 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5944 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5945 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5946
5947 /* Free but don't flush the EFLAGS register. */
5948 iemNativeRegFreeTmp(pReNative, idxEflReg);
5949
5950 return off;
5951}
5952
5953
5954/** The VINF_SUCCESS dummy. */
5955template<int const a_rcNormal>
5956DECL_FORCE_INLINE(uint32_t)
5957iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5958{
5959 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
5960 if (a_rcNormal != VINF_SUCCESS)
5961 {
5962#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5963 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5964#else
5965 RT_NOREF_PV(idxInstr);
5966#endif
5967 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
5968 }
5969 return off;
5970}
5971
5972
5973#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
5974 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
5975 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5976
5977#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
5978 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
5979 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
5980 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5981
5982/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5983DECL_INLINE_THROW(uint32_t)
5984iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5985{
5986 /* Allocate a temporary PC register. */
5987 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5988
5989 /* Perform the addition and store the result. */
5990 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5991 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5992
5993 /* Free but don't flush the PC register. */
5994 iemNativeRegFreeTmp(pReNative, idxPcReg);
5995
5996 return off;
5997}
5998
5999
6000#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6001 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6002 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6003
6004#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6005 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6006 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6007 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6008
6009/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6010DECL_INLINE_THROW(uint32_t)
6011iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6012{
6013 /* Allocate a temporary PC register. */
6014 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6015
6016 /* Perform the addition and store the result. */
6017 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6018 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6019
6020 /* Free but don't flush the PC register. */
6021 iemNativeRegFreeTmp(pReNative, idxPcReg);
6022
6023 return off;
6024}
6025
6026
6027#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6028 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6029 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6030
6031#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6032 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6033 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6034 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6035
6036/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6037DECL_INLINE_THROW(uint32_t)
6038iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6039{
6040 /* Allocate a temporary PC register. */
6041 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6042
6043 /* Perform the addition and store the result. */
6044 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6045 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6046 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6047
6048 /* Free but don't flush the PC register. */
6049 iemNativeRegFreeTmp(pReNative, idxPcReg);
6050
6051 return off;
6052}
6053
6054
6055
6056/*********************************************************************************************************************************
6057* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6058*********************************************************************************************************************************/
6059
6060#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6061 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6062 (a_enmEffOpSize), pCallEntry->idxInstr); \
6063 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6064
6065#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6066 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6067 (a_enmEffOpSize), pCallEntry->idxInstr); \
6068 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6069 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6070
6071#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6072 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6073 IEMMODE_16BIT, pCallEntry->idxInstr); \
6074 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6075
6076#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6077 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6078 IEMMODE_16BIT, pCallEntry->idxInstr); \
6079 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6080 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6081
6082#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6083 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6084 IEMMODE_64BIT, pCallEntry->idxInstr); \
6085 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6086
6087#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6088 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6089 IEMMODE_64BIT, pCallEntry->idxInstr); \
6090 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6091 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6092
6093/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6094 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6095 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6096DECL_INLINE_THROW(uint32_t)
6097iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6098 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6099{
6100 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6101
6102 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6103 off = iemNativeRegFlushPendingWrites(pReNative, off);
6104
6105 /* Allocate a temporary PC register. */
6106 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6107
6108 /* Perform the addition. */
6109 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6110
6111 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6112 {
6113 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6114 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6115 }
6116 else
6117 {
6118 /* Just truncate the result to 16-bit IP. */
6119 Assert(enmEffOpSize == IEMMODE_16BIT);
6120 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6121 }
6122 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6123
6124 /* Free but don't flush the PC register. */
6125 iemNativeRegFreeTmp(pReNative, idxPcReg);
6126
6127 return off;
6128}
6129
6130
6131#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6132 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6133 (a_enmEffOpSize), pCallEntry->idxInstr); \
6134 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6135
6136#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6137 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6138 (a_enmEffOpSize), pCallEntry->idxInstr); \
6139 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6140 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6141
6142#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6143 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6144 IEMMODE_16BIT, pCallEntry->idxInstr); \
6145 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6146
6147#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6148 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6149 IEMMODE_16BIT, pCallEntry->idxInstr); \
6150 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6151 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6152
6153#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6154 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6155 IEMMODE_32BIT, pCallEntry->idxInstr); \
6156 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6157
6158#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6159 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6160 IEMMODE_32BIT, pCallEntry->idxInstr); \
6161 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6162 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6163
6164/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6165 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6166 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6167DECL_INLINE_THROW(uint32_t)
6168iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6169 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6170{
6171 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6172
6173 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6174 off = iemNativeRegFlushPendingWrites(pReNative, off);
6175
6176 /* Allocate a temporary PC register. */
6177 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6178
6179 /* Perform the addition. */
6180 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6181
6182 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6183 if (enmEffOpSize == IEMMODE_16BIT)
6184 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6185
6186 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6187 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6188
6189 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6190
6191 /* Free but don't flush the PC register. */
6192 iemNativeRegFreeTmp(pReNative, idxPcReg);
6193
6194 return off;
6195}
6196
6197
6198#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6199 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6200 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6201
6202#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6203 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6204 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6205 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6206
6207#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6208 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6209 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6210
6211#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6212 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6213 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6214 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6215
6216#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6217 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6218 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6219
6220#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6221 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6222 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6223 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6224
6225/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6226DECL_INLINE_THROW(uint32_t)
6227iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6228 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6229{
6230 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6231 off = iemNativeRegFlushPendingWrites(pReNative, off);
6232
6233 /* Allocate a temporary PC register. */
6234 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6235
6236 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6237 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6238 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6239 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6240 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6241
6242 /* Free but don't flush the PC register. */
6243 iemNativeRegFreeTmp(pReNative, idxPcReg);
6244
6245 return off;
6246}
6247
6248
6249
6250/*********************************************************************************************************************************
6251* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6252*********************************************************************************************************************************/
6253
6254/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6255#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6256 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6257
6258/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6259#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6260 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6261
6262/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6263#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6264 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6265
6266/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6267 * clears flags. */
6268#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6269 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6270 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6271
6272/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6273 * clears flags. */
6274#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6275 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6276 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6277
6278/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6279 * clears flags. */
6280#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6281 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6282 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6283
6284#undef IEM_MC_SET_RIP_U16_AND_FINISH
6285
6286
6287/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6288#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6289 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6290
6291/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6292#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6293 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6294
6295/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6296 * clears flags. */
6297#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6298 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6299 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6300
6301/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6302 * and clears flags. */
6303#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6304 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6305 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6306
6307#undef IEM_MC_SET_RIP_U32_AND_FINISH
6308
6309
6310/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6311#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6312 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6313
6314/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6315 * and clears flags. */
6316#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6317 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6318 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6319
6320#undef IEM_MC_SET_RIP_U64_AND_FINISH
6321
6322
6323/** Same as iemRegRipJumpU16AndFinishNoFlags,
6324 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6325DECL_INLINE_THROW(uint32_t)
6326iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6327 uint8_t idxInstr, uint8_t cbVar)
6328{
6329 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6330 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
6331
6332 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6333 off = iemNativeRegFlushPendingWrites(pReNative, off);
6334
6335 /* Get a register with the new PC loaded from idxVarPc.
6336 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6337 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6338
6339 /* Check limit (may #GP(0) + exit TB). */
6340 if (!f64Bit)
6341 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6342 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6343 else if (cbVar > sizeof(uint32_t))
6344 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6345
6346 /* Store the result. */
6347 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6348
6349 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6350 /** @todo implictly free the variable? */
6351
6352 return off;
6353}
6354
6355
6356
6357/*********************************************************************************************************************************
6358* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
6359*********************************************************************************************************************************/
6360
6361/**
6362 * Pushes an IEM_MC_IF_XXX onto the condition stack.
6363 *
6364 * @returns Pointer to the condition stack entry on success, NULL on failure
6365 * (too many nestings)
6366 */
6367DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
6368{
6369 uint32_t const idxStack = pReNative->cCondDepth;
6370 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
6371
6372 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
6373 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
6374
6375 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
6376 pEntry->fInElse = false;
6377 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
6378 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
6379
6380 return pEntry;
6381}
6382
6383
6384/**
6385 * Start of the if-block, snapshotting the register and variable state.
6386 */
6387DECL_INLINE_THROW(void)
6388iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
6389{
6390 Assert(offIfBlock != UINT32_MAX);
6391 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6392 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6393 Assert(!pEntry->fInElse);
6394
6395 /* Define the start of the IF block if request or for disassembly purposes. */
6396 if (idxLabelIf != UINT32_MAX)
6397 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
6398#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6399 else
6400 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
6401#else
6402 RT_NOREF(offIfBlock);
6403#endif
6404
6405 /* Copy the initial state so we can restore it in the 'else' block. */
6406 pEntry->InitialState = pReNative->Core;
6407}
6408
6409
6410#define IEM_MC_ELSE() } while (0); \
6411 off = iemNativeEmitElse(pReNative, off); \
6412 do {
6413
6414/** Emits code related to IEM_MC_ELSE. */
6415DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6416{
6417 /* Check sanity and get the conditional stack entry. */
6418 Assert(off != UINT32_MAX);
6419 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6420 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6421 Assert(!pEntry->fInElse);
6422
6423 /* Jump to the endif */
6424 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
6425
6426 /* Define the else label and enter the else part of the condition. */
6427 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6428 pEntry->fInElse = true;
6429
6430 /* Snapshot the core state so we can do a merge at the endif and restore
6431 the snapshot we took at the start of the if-block. */
6432 pEntry->IfFinalState = pReNative->Core;
6433 pReNative->Core = pEntry->InitialState;
6434
6435 return off;
6436}
6437
6438
6439#define IEM_MC_ENDIF() } while (0); \
6440 off = iemNativeEmitEndIf(pReNative, off)
6441
6442/** Emits code related to IEM_MC_ENDIF. */
6443DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6444{
6445 /* Check sanity and get the conditional stack entry. */
6446 Assert(off != UINT32_MAX);
6447 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6448 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6449
6450 /*
6451 * Now we have find common group with the core state at the end of the
6452 * if-final. Use the smallest common denominator and just drop anything
6453 * that isn't the same in both states.
6454 */
6455 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
6456 * which is why we're doing this at the end of the else-block.
6457 * But we'd need more info about future for that to be worth the effort. */
6458 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
6459 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
6460 {
6461 /* shadow guest stuff first. */
6462 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
6463 if (fGstRegs)
6464 {
6465 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
6466 do
6467 {
6468 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
6469 fGstRegs &= ~RT_BIT_64(idxGstReg);
6470
6471 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6472 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
6473 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
6474 {
6475 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
6476 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
6477 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
6478 }
6479 } while (fGstRegs);
6480 }
6481 else
6482 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
6483
6484 /* Check variables next. For now we must require them to be identical
6485 or stuff we can recreate. */
6486 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
6487 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
6488 if (fVars)
6489 {
6490 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
6491 do
6492 {
6493 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
6494 fVars &= ~RT_BIT_32(idxVar);
6495
6496 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
6497 {
6498 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
6499 continue;
6500 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6501 {
6502 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6503 if (idxHstReg != UINT8_MAX)
6504 {
6505 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6506 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6507 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
6508 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6509 }
6510 continue;
6511 }
6512 }
6513 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
6514 continue;
6515
6516 /* Irreconcilable, so drop it. */
6517 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6518 if (idxHstReg != UINT8_MAX)
6519 {
6520 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6521 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6522 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
6523 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6524 }
6525 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
6526 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6527 } while (fVars);
6528 }
6529
6530 /* Finally, check that the host register allocations matches. */
6531 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
6532 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
6533 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
6534 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
6535 }
6536
6537 /*
6538 * Define the endif label and maybe the else one if we're still in the 'if' part.
6539 */
6540 if (!pEntry->fInElse)
6541 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6542 else
6543 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
6544 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
6545
6546 /* Pop the conditional stack.*/
6547 pReNative->cCondDepth -= 1;
6548
6549 return off;
6550}
6551
6552
6553#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
6554 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
6555 do {
6556
6557/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
6558DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6559{
6560 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6561
6562 /* Get the eflags. */
6563 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6564 kIemNativeGstRegUse_ReadOnly);
6565
6566 /* Test and jump. */
6567 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6568
6569 /* Free but don't flush the EFlags register. */
6570 iemNativeRegFreeTmp(pReNative, idxEflReg);
6571
6572 /* Make a copy of the core state now as we start the if-block. */
6573 iemNativeCondStartIfBlock(pReNative, off);
6574
6575 return off;
6576}
6577
6578
6579#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
6580 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
6581 do {
6582
6583/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
6584DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6585{
6586 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6587
6588 /* Get the eflags. */
6589 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6590 kIemNativeGstRegUse_ReadOnly);
6591
6592 /* Test and jump. */
6593 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6594
6595 /* Free but don't flush the EFlags register. */
6596 iemNativeRegFreeTmp(pReNative, idxEflReg);
6597
6598 /* Make a copy of the core state now as we start the if-block. */
6599 iemNativeCondStartIfBlock(pReNative, off);
6600
6601 return off;
6602}
6603
6604
6605#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
6606 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
6607 do {
6608
6609/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
6610DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6611{
6612 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6613
6614 /* Get the eflags. */
6615 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6616 kIemNativeGstRegUse_ReadOnly);
6617
6618 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6619 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6620
6621 /* Test and jump. */
6622 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6623
6624 /* Free but don't flush the EFlags register. */
6625 iemNativeRegFreeTmp(pReNative, idxEflReg);
6626
6627 /* Make a copy of the core state now as we start the if-block. */
6628 iemNativeCondStartIfBlock(pReNative, off);
6629
6630 return off;
6631}
6632
6633
6634#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
6635 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
6636 do {
6637
6638/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
6639DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6640{
6641 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6642
6643 /* Get the eflags. */
6644 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6645 kIemNativeGstRegUse_ReadOnly);
6646
6647 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6648 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6649
6650 /* Test and jump. */
6651 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6652
6653 /* Free but don't flush the EFlags register. */
6654 iemNativeRegFreeTmp(pReNative, idxEflReg);
6655
6656 /* Make a copy of the core state now as we start the if-block. */
6657 iemNativeCondStartIfBlock(pReNative, off);
6658
6659 return off;
6660}
6661
6662
6663#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6664 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6665 do {
6666
6667#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6668 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6669 do {
6670
6671/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6672DECL_INLINE_THROW(uint32_t)
6673iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6674 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6675{
6676 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6677
6678 /* Get the eflags. */
6679 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6680 kIemNativeGstRegUse_ReadOnly);
6681
6682 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6683 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6684
6685 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6686 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6687 Assert(iBitNo1 != iBitNo2);
6688
6689#ifdef RT_ARCH_AMD64
6690 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6691
6692 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6693 if (iBitNo1 > iBitNo2)
6694 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6695 else
6696 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6697 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6698
6699#elif defined(RT_ARCH_ARM64)
6700 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6701 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6702
6703 /* and tmpreg, eflreg, #1<<iBitNo1 */
6704 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6705
6706 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6707 if (iBitNo1 > iBitNo2)
6708 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6709 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6710 else
6711 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6712 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6713
6714 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6715
6716#else
6717# error "Port me"
6718#endif
6719
6720 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6721 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6722 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6723
6724 /* Free but don't flush the EFlags and tmp registers. */
6725 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6726 iemNativeRegFreeTmp(pReNative, idxEflReg);
6727
6728 /* Make a copy of the core state now as we start the if-block. */
6729 iemNativeCondStartIfBlock(pReNative, off);
6730
6731 return off;
6732}
6733
6734
6735#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6736 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6737 do {
6738
6739#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6740 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6741 do {
6742
6743/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6744 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6745DECL_INLINE_THROW(uint32_t)
6746iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6747 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6748{
6749 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6750
6751 /* We need an if-block label for the non-inverted variant. */
6752 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6753 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6754
6755 /* Get the eflags. */
6756 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6757 kIemNativeGstRegUse_ReadOnly);
6758
6759 /* Translate the flag masks to bit numbers. */
6760 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6761 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6762
6763 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6764 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6765 Assert(iBitNo1 != iBitNo);
6766
6767 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6768 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6769 Assert(iBitNo2 != iBitNo);
6770 Assert(iBitNo2 != iBitNo1);
6771
6772#ifdef RT_ARCH_AMD64
6773 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6774#elif defined(RT_ARCH_ARM64)
6775 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6776#endif
6777
6778 /* Check for the lone bit first. */
6779 if (!fInverted)
6780 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6781 else
6782 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6783
6784 /* Then extract and compare the other two bits. */
6785#ifdef RT_ARCH_AMD64
6786 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6787 if (iBitNo1 > iBitNo2)
6788 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6789 else
6790 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6791 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6792
6793#elif defined(RT_ARCH_ARM64)
6794 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6795
6796 /* and tmpreg, eflreg, #1<<iBitNo1 */
6797 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6798
6799 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6800 if (iBitNo1 > iBitNo2)
6801 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6802 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6803 else
6804 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6805 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6806
6807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6808
6809#else
6810# error "Port me"
6811#endif
6812
6813 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6814 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6815 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6816
6817 /* Free but don't flush the EFlags and tmp registers. */
6818 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6819 iemNativeRegFreeTmp(pReNative, idxEflReg);
6820
6821 /* Make a copy of the core state now as we start the if-block. */
6822 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
6823
6824 return off;
6825}
6826
6827
6828#define IEM_MC_IF_CX_IS_NZ() \
6829 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
6830 do {
6831
6832/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6833DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6834{
6835 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6836
6837 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6838 kIemNativeGstRegUse_ReadOnly);
6839 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6840 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6841
6842 iemNativeCondStartIfBlock(pReNative, off);
6843 return off;
6844}
6845
6846
6847#define IEM_MC_IF_ECX_IS_NZ() \
6848 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
6849 do {
6850
6851#define IEM_MC_IF_RCX_IS_NZ() \
6852 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
6853 do {
6854
6855/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
6856DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6857{
6858 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6859
6860 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6861 kIemNativeGstRegUse_ReadOnly);
6862 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6863 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6864
6865 iemNativeCondStartIfBlock(pReNative, off);
6866 return off;
6867}
6868
6869
6870#define IEM_MC_IF_CX_IS_NOT_ONE() \
6871 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
6872 do {
6873
6874/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
6875DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6876{
6877 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6878
6879 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6880 kIemNativeGstRegUse_ReadOnly);
6881#ifdef RT_ARCH_AMD64
6882 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6883#else
6884 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6885 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
6886 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6887#endif
6888 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6889
6890 iemNativeCondStartIfBlock(pReNative, off);
6891 return off;
6892}
6893
6894
6895#define IEM_MC_IF_ECX_IS_NOT_ONE() \
6896 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
6897 do {
6898
6899#define IEM_MC_IF_RCX_IS_NOT_ONE() \
6900 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
6901 do {
6902
6903/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
6904DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6905{
6906 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6907
6908 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6909 kIemNativeGstRegUse_ReadOnly);
6910 if (f64Bit)
6911 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6912 else
6913 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6914 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6915
6916 iemNativeCondStartIfBlock(pReNative, off);
6917 return off;
6918}
6919
6920
6921#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
6922 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
6923 do {
6924
6925#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
6926 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
6927 do {
6928
6929/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
6930 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
6931DECL_INLINE_THROW(uint32_t)
6932iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
6933{
6934 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6935
6936 /* We have to load both RCX and EFLAGS before we can start branching,
6937 otherwise we'll end up in the else-block with an inconsistent
6938 register allocator state.
6939 Doing EFLAGS first as it's more likely to be loaded, right? */
6940 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6941 kIemNativeGstRegUse_ReadOnly);
6942 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6943 kIemNativeGstRegUse_ReadOnly);
6944
6945 /** @todo we could reduce this to a single branch instruction by spending a
6946 * temporary register and some setnz stuff. Not sure if loops are
6947 * worth it. */
6948 /* Check CX. */
6949#ifdef RT_ARCH_AMD64
6950 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6951#else
6952 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6953 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
6954 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6955#endif
6956
6957 /* Check the EFlags bit. */
6958 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6959 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6960 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6961 !fCheckIfSet /*fJmpIfSet*/);
6962
6963 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6964 iemNativeRegFreeTmp(pReNative, idxEflReg);
6965
6966 iemNativeCondStartIfBlock(pReNative, off);
6967 return off;
6968}
6969
6970
6971#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
6972 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
6973 do {
6974
6975#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
6976 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
6977 do {
6978
6979#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
6980 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
6981 do {
6982
6983#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
6984 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
6985 do {
6986
6987/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
6988 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
6989 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
6990 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
6991DECL_INLINE_THROW(uint32_t)
6992iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6993 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
6994{
6995 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6996
6997 /* We have to load both RCX and EFLAGS before we can start branching,
6998 otherwise we'll end up in the else-block with an inconsistent
6999 register allocator state.
7000 Doing EFLAGS first as it's more likely to be loaded, right? */
7001 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7002 kIemNativeGstRegUse_ReadOnly);
7003 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7004 kIemNativeGstRegUse_ReadOnly);
7005
7006 /** @todo we could reduce this to a single branch instruction by spending a
7007 * temporary register and some setnz stuff. Not sure if loops are
7008 * worth it. */
7009 /* Check RCX/ECX. */
7010 if (f64Bit)
7011 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7012 else
7013 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7014
7015 /* Check the EFlags bit. */
7016 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7017 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7018 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7019 !fCheckIfSet /*fJmpIfSet*/);
7020
7021 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7022 iemNativeRegFreeTmp(pReNative, idxEflReg);
7023
7024 iemNativeCondStartIfBlock(pReNative, off);
7025 return off;
7026}
7027
7028
7029
7030/*********************************************************************************************************************************
7031* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7032*********************************************************************************************************************************/
7033/** Number of hidden arguments for CIMPL calls.
7034 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7035#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7036# define IEM_CIMPL_HIDDEN_ARGS 3
7037#else
7038# define IEM_CIMPL_HIDDEN_ARGS 2
7039#endif
7040
7041#define IEM_MC_NOREF(a_Name) \
7042 RT_NOREF_PV(a_Name)
7043
7044#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7045 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7046
7047#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7048 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7049
7050#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7051 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7052
7053#define IEM_MC_LOCAL(a_Type, a_Name) \
7054 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7055
7056#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7057 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7058
7059
7060/**
7061 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7062 */
7063DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7064{
7065 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7066 return IEM_CIMPL_HIDDEN_ARGS;
7067 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7068 return 1;
7069 return 0;
7070}
7071
7072
7073/**
7074 * Internal work that allocates a variable with kind set to
7075 * kIemNativeVarKind_Invalid and no current stack allocation.
7076 *
7077 * The kind will either be set by the caller or later when the variable is first
7078 * assigned a value.
7079 */
7080static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7081{
7082 Assert(cbType > 0 && cbType <= 64);
7083 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7084 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7085 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7086 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7087 pReNative->Core.aVars[idxVar].cbVar = cbType;
7088 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7089 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7090 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7091 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7092 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7093 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7094 pReNative->Core.aVars[idxVar].u.uValue = 0;
7095 return idxVar;
7096}
7097
7098
7099/**
7100 * Internal work that allocates an argument variable w/o setting enmKind.
7101 */
7102static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7103{
7104 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7105 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7106 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7107
7108 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7109 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
7110 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7111 return idxVar;
7112}
7113
7114
7115/**
7116 * Gets the stack slot for a stack variable, allocating one if necessary.
7117 *
7118 * Calling this function implies that the stack slot will contain a valid
7119 * variable value. The caller deals with any register currently assigned to the
7120 * variable, typically by spilling it into the stack slot.
7121 *
7122 * @returns The stack slot number.
7123 * @param pReNative The recompiler state.
7124 * @param idxVar The variable.
7125 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7126 */
7127DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7128{
7129 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7130 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7131
7132 /* Already got a slot? */
7133 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7134 if (idxStackSlot != UINT8_MAX)
7135 {
7136 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7137 return idxStackSlot;
7138 }
7139
7140 /*
7141 * A single slot is easy to allocate.
7142 * Allocate them from the top end, closest to BP, to reduce the displacement.
7143 */
7144 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
7145 {
7146 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7147 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7148 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7149 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7150 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
7151 return (uint8_t)iSlot;
7152 }
7153
7154 /*
7155 * We need more than one stack slot.
7156 *
7157 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7158 */
7159 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7160 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
7161 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
7162 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
7163 uint32_t bmStack = ~pReNative->Core.bmStack;
7164 while (bmStack != UINT32_MAX)
7165 {
7166/** @todo allocate from the top to reduce BP displacement. */
7167 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7168 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7169 if (!(iSlot & fBitAlignMask))
7170 {
7171 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7172 {
7173 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7174 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7175 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
7176 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
7177 return (uint8_t)iSlot;
7178 }
7179 }
7180 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7181 }
7182 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7183}
7184
7185
7186/**
7187 * Changes the variable to a stack variable.
7188 *
7189 * Currently this is s only possible to do the first time the variable is used,
7190 * switching later is can be implemented but not done.
7191 *
7192 * @param pReNative The recompiler state.
7193 * @param idxVar The variable.
7194 * @throws VERR_IEM_VAR_IPE_2
7195 */
7196static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7197{
7198 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7199 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7200 {
7201 /* We could in theory transition from immediate to stack as well, but it
7202 would involve the caller doing work storing the value on the stack. So,
7203 till that's required we only allow transition from invalid. */
7204 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7205 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7206 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7207 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
7208
7209 /* Note! We don't allocate a stack slot here, that's only done when a
7210 slot is actually needed to hold a variable value. */
7211 }
7212}
7213
7214
7215/**
7216 * Sets it to a variable with a constant value.
7217 *
7218 * This does not require stack storage as we know the value and can always
7219 * reload it, unless of course it's referenced.
7220 *
7221 * @param pReNative The recompiler state.
7222 * @param idxVar The variable.
7223 * @param uValue The immediate value.
7224 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7225 */
7226static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7227{
7228 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7229 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
7230 {
7231 /* Only simple transitions for now. */
7232 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7233 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7234 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
7235 }
7236 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7237
7238 pReNative->Core.aVars[idxVar].u.uValue = uValue;
7239 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
7240 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
7241 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
7242}
7243
7244
7245/**
7246 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7247 *
7248 * This does not require stack storage as we know the value and can always
7249 * reload it. Loading is postponed till needed.
7250 *
7251 * @param pReNative The recompiler state.
7252 * @param idxVar The variable.
7253 * @param idxOtherVar The variable to take the (stack) address of.
7254 *
7255 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7256 */
7257static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7258{
7259 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7260 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7261
7262 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7263 {
7264 /* Only simple transitions for now. */
7265 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7266 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7267 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7268 }
7269 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7270
7271 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
7272
7273 /* Update the other variable, ensure it's a stack variable. */
7274 /** @todo handle variables with const values... that'll go boom now. */
7275 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7276 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
7277}
7278
7279
7280/**
7281 * Sets the variable to a reference (pointer) to a guest register reference.
7282 *
7283 * This does not require stack storage as we know the value and can always
7284 * reload it. Loading is postponed till needed.
7285 *
7286 * @param pReNative The recompiler state.
7287 * @param idxVar The variable.
7288 * @param enmRegClass The class guest registers to reference.
7289 * @param idxReg The register within @a enmRegClass to reference.
7290 *
7291 * @throws VERR_IEM_VAR_IPE_2
7292 */
7293static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7294 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7295{
7296 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7297
7298 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
7299 {
7300 /* Only simple transitions for now. */
7301 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7302 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7303 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
7304 }
7305 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7306
7307 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
7308 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
7309}
7310
7311
7312DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7313{
7314 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7315}
7316
7317
7318DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7319{
7320 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7321
7322 /* Since we're using a generic uint64_t value type, we must truncate it if
7323 the variable is smaller otherwise we may end up with too large value when
7324 scaling up a imm8 w/ sign-extension.
7325
7326 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7327 in the bios, bx=1) when running on arm, because clang expect 16-bit
7328 register parameters to have bits 16 and up set to zero. Instead of
7329 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7330 CF value in the result. */
7331 switch (cbType)
7332 {
7333 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7334 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7335 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7336 }
7337 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7338 return idxVar;
7339}
7340
7341
7342DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7343{
7344 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7345 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7346 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7347 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7348
7349 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7350 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
7351 return idxArgVar;
7352}
7353
7354
7355DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7356{
7357 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7358 /* Don't set to stack now, leave that to the first use as for instance
7359 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7360 return idxVar;
7361}
7362
7363
7364DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7365{
7366 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7367
7368 /* Since we're using a generic uint64_t value type, we must truncate it if
7369 the variable is smaller otherwise we may end up with too large value when
7370 scaling up a imm8 w/ sign-extension. */
7371 switch (cbType)
7372 {
7373 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7374 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7375 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7376 }
7377 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7378 return idxVar;
7379}
7380
7381
7382/**
7383 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7384 * fixed till we call iemNativeVarRegisterRelease.
7385 *
7386 * @returns The host register number.
7387 * @param pReNative The recompiler state.
7388 * @param idxVar The variable.
7389 * @param poff Pointer to the instruction buffer offset.
7390 * In case a register needs to be freed up or the value
7391 * loaded off the stack.
7392 * @param fInitialized Set if the variable must already have been initialized.
7393 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7394 * the case.
7395 * @param idxRegPref Preferred register number or UINT8_MAX.
7396 */
7397DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7398 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7399{
7400 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7401 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
7402 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7403
7404 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7405 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7406 {
7407 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
7408 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7409 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7410 return idxReg;
7411 }
7412
7413 /*
7414 * If the kind of variable has not yet been set, default to 'stack'.
7415 */
7416 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
7417 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7418 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
7419 iemNativeVarSetKindToStack(pReNative, idxVar);
7420
7421 /*
7422 * We have to allocate a register for the variable, even if its a stack one
7423 * as we don't know if there are modification being made to it before its
7424 * finalized (todo: analyze and insert hints about that?).
7425 *
7426 * If we can, we try get the correct register for argument variables. This
7427 * is assuming that most argument variables are fetched as close as possible
7428 * to the actual call, so that there aren't any interfering hidden calls
7429 * (memory accesses, etc) inbetween.
7430 *
7431 * If we cannot or it's a variable, we make sure no argument registers
7432 * that will be used by this MC block will be allocated here, and we always
7433 * prefer non-volatile registers to avoid needing to spill stuff for internal
7434 * call.
7435 */
7436 /** @todo Detect too early argument value fetches and warn about hidden
7437 * calls causing less optimal code to be generated in the python script. */
7438
7439 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7440 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7441 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7442 {
7443 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7444 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7445 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7446 }
7447 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7448 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7449 {
7450 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7451 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7452 & ~pReNative->Core.bmHstRegsWithGstShadow
7453 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7454 & fNotArgsMask;
7455 if (fRegs)
7456 {
7457 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7458 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7459 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7460 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7461 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7462 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7463 }
7464 else
7465 {
7466 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7467 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7468 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7469 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7470 }
7471 }
7472 else
7473 {
7474 idxReg = idxRegPref;
7475 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7476 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
7477 }
7478 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7479 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7480
7481 /*
7482 * Load it off the stack if we've got a stack slot.
7483 */
7484 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7485 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7486 {
7487 Assert(fInitialized);
7488 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7489 switch (pReNative->Core.aVars[idxVar].cbVar)
7490 {
7491 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7492 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7493 case 3: AssertFailed(); RT_FALL_THRU();
7494 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7495 default: AssertFailed(); RT_FALL_THRU();
7496 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7497 }
7498 }
7499 else
7500 {
7501 Assert(idxStackSlot == UINT8_MAX);
7502 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7503 }
7504 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7505 return idxReg;
7506}
7507
7508
7509/**
7510 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7511 * guest register.
7512 *
7513 * This function makes sure there is a register for it and sets it to be the
7514 * current shadow copy of @a enmGstReg.
7515 *
7516 * @returns The host register number.
7517 * @param pReNative The recompiler state.
7518 * @param idxVar The variable.
7519 * @param enmGstReg The guest register this variable will be written to
7520 * after this call.
7521 * @param poff Pointer to the instruction buffer offset.
7522 * In case a register needs to be freed up or if the
7523 * variable content needs to be loaded off the stack.
7524 *
7525 * @note We DO NOT expect @a idxVar to be an argument variable,
7526 * because we can only in the commit stage of an instruction when this
7527 * function is used.
7528 */
7529DECL_HIDDEN_THROW(uint8_t)
7530iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7531{
7532 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7533 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7534 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
7535 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
7536 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
7537 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
7538 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7539 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7540
7541 /*
7542 * This shouldn't ever be used for arguments, unless it's in a weird else
7543 * branch that doesn't do any calling and even then it's questionable.
7544 *
7545 * However, in case someone writes crazy wrong MC code and does register
7546 * updates before making calls, just use the regular register allocator to
7547 * ensure we get a register suitable for the intended argument number.
7548 */
7549 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7550
7551 /*
7552 * If there is already a register for the variable, we transfer/set the
7553 * guest shadow copy assignment to it.
7554 */
7555 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7556 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7557 {
7558 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7559 {
7560 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7561 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7562 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7563 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7564 }
7565 else
7566 {
7567 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7568 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7569 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7570 }
7571 /** @todo figure this one out. We need some way of making sure the register isn't
7572 * modified after this point, just in case we start writing crappy MC code. */
7573 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
7574 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7575 return idxReg;
7576 }
7577 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7578
7579 /*
7580 * Because this is supposed to be the commit stage, we're just tag along with the
7581 * temporary register allocator and upgrade it to a variable register.
7582 */
7583 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7584 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7585 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7586 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7587 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7588 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7589
7590 /*
7591 * Now we need to load the register value.
7592 */
7593 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
7594 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
7595 else
7596 {
7597 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7598 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7599 switch (pReNative->Core.aVars[idxVar].cbVar)
7600 {
7601 case sizeof(uint64_t):
7602 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7603 break;
7604 case sizeof(uint32_t):
7605 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7606 break;
7607 case sizeof(uint16_t):
7608 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7609 break;
7610 case sizeof(uint8_t):
7611 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7612 break;
7613 default:
7614 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7615 }
7616 }
7617
7618 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7619 return idxReg;
7620}
7621
7622
7623/**
7624 * Sets the host register for @a idxVarRc to @a idxReg.
7625 *
7626 * The register must not be allocated. Any guest register shadowing will be
7627 * implictly dropped by this call.
7628 *
7629 * The variable must not have any register associated with it (causes
7630 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
7631 * implied.
7632 *
7633 * @returns idxReg
7634 * @param pReNative The recompiler state.
7635 * @param idxVar The variable.
7636 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
7637 * @param off For recording in debug info.
7638 *
7639 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
7640 */
7641DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
7642{
7643 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7644 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7645 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
7646 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
7647 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
7648
7649 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
7650 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7651
7652 iemNativeVarSetKindToStack(pReNative, idxVar);
7653 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7654
7655 return idxReg;
7656}
7657
7658
7659/**
7660 * A convenient helper function.
7661 */
7662DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7663 uint8_t idxReg, uint32_t *poff)
7664{
7665 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
7666 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7667 return idxReg;
7668}
7669
7670
7671/**
7672 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7673 *
7674 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7675 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7676 * requirement of flushing anything in volatile host registers when making a
7677 * call.
7678 *
7679 * @returns New @a off value.
7680 * @param pReNative The recompiler state.
7681 * @param off The code buffer position.
7682 * @param fHstRegsNotToSave Set of registers not to save & restore.
7683 */
7684DECL_HIDDEN_THROW(uint32_t)
7685iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7686{
7687 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7688 if (fHstRegs)
7689 {
7690 do
7691 {
7692 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7693 fHstRegs &= ~RT_BIT_32(idxHstReg);
7694
7695 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7696 {
7697 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7698 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7699 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7700 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7701 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7702 switch (pReNative->Core.aVars[idxVar].enmKind)
7703 {
7704 case kIemNativeVarKind_Stack:
7705 {
7706 /* Temporarily spill the variable register. */
7707 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7708 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7709 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7710 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7711 continue;
7712 }
7713
7714 case kIemNativeVarKind_Immediate:
7715 case kIemNativeVarKind_VarRef:
7716 case kIemNativeVarKind_GstRegRef:
7717 /* It is weird to have any of these loaded at this point. */
7718 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7719 continue;
7720
7721 case kIemNativeVarKind_End:
7722 case kIemNativeVarKind_Invalid:
7723 break;
7724 }
7725 AssertFailed();
7726 }
7727 else
7728 {
7729 /*
7730 * Allocate a temporary stack slot and spill the register to it.
7731 */
7732 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7733 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7734 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7735 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7736 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7737 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7738 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7739 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7740 }
7741 } while (fHstRegs);
7742 }
7743 return off;
7744}
7745
7746
7747/**
7748 * Emit code to restore volatile registers after to a call to a helper.
7749 *
7750 * @returns New @a off value.
7751 * @param pReNative The recompiler state.
7752 * @param off The code buffer position.
7753 * @param fHstRegsNotToSave Set of registers not to save & restore.
7754 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7755 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7756 */
7757DECL_HIDDEN_THROW(uint32_t)
7758iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7759{
7760 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7761 if (fHstRegs)
7762 {
7763 do
7764 {
7765 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7766 fHstRegs &= ~RT_BIT_32(idxHstReg);
7767
7768 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7769 {
7770 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7771 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7772 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7773 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7774 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7775 switch (pReNative->Core.aVars[idxVar].enmKind)
7776 {
7777 case kIemNativeVarKind_Stack:
7778 {
7779 /* Unspill the variable register. */
7780 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7781 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%d/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7782 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7783 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7784 continue;
7785 }
7786
7787 case kIemNativeVarKind_Immediate:
7788 case kIemNativeVarKind_VarRef:
7789 case kIemNativeVarKind_GstRegRef:
7790 /* It is weird to have any of these loaded at this point. */
7791 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7792 continue;
7793
7794 case kIemNativeVarKind_End:
7795 case kIemNativeVarKind_Invalid:
7796 break;
7797 }
7798 AssertFailed();
7799 }
7800 else
7801 {
7802 /*
7803 * Restore from temporary stack slot.
7804 */
7805 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7806 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7807 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7808 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7809
7810 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7811 }
7812 } while (fHstRegs);
7813 }
7814 return off;
7815}
7816
7817
7818/**
7819 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7820 *
7821 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7822 */
7823DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7824{
7825 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7826 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7827 {
7828 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7829 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7830 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7831 Assert(cSlots > 0);
7832 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7833 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
7834 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7835 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7836 }
7837 else
7838 Assert(idxStackSlot == UINT8_MAX);
7839}
7840
7841
7842/**
7843 * Worker that frees a single variable.
7844 *
7845 * ASSUMES that @a idxVar is valid.
7846 */
7847DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7848{
7849 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7850 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7851 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7852
7853 /* Free the host register first if any assigned. */
7854 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7855 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7856 {
7857 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7858 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7859 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7860 }
7861
7862 /* Free argument mapping. */
7863 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7864 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7865 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7866
7867 /* Free the stack slots. */
7868 iemNativeVarFreeStackSlots(pReNative, idxVar);
7869
7870 /* Free the actual variable. */
7871 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7872 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7873}
7874
7875
7876/**
7877 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7878 */
7879DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7880{
7881 while (bmVars != 0)
7882 {
7883 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7884 bmVars &= ~RT_BIT_32(idxVar);
7885
7886#if 1 /** @todo optimize by simplifying this later... */
7887 iemNativeVarFreeOneWorker(pReNative, idxVar);
7888#else
7889 /* Only need to free the host register, the rest is done as bulk updates below. */
7890 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7891 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7892 {
7893 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7894 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7895 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7896 }
7897#endif
7898 }
7899#if 0 /** @todo optimize by simplifying this later... */
7900 pReNative->Core.bmVars = 0;
7901 pReNative->Core.bmStack = 0;
7902 pReNative->Core.u64ArgVars = UINT64_MAX;
7903#endif
7904}
7905
7906
7907/**
7908 * This is called by IEM_MC_END() to clean up all variables.
7909 */
7910DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
7911{
7912 uint32_t const bmVars = pReNative->Core.bmVars;
7913 if (bmVars != 0)
7914 iemNativeVarFreeAllSlow(pReNative, bmVars);
7915 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7916 Assert(pReNative->Core.bmStack == 0);
7917}
7918
7919
7920#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
7921
7922/**
7923 * This is called by IEM_MC_FREE_LOCAL.
7924 */
7925DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7926{
7927 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7928 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7929 iemNativeVarFreeOneWorker(pReNative, idxVar);
7930}
7931
7932
7933#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
7934
7935/**
7936 * This is called by IEM_MC_FREE_ARG.
7937 */
7938DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7939{
7940 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7941 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
7942 iemNativeVarFreeOneWorker(pReNative, idxVar);
7943}
7944
7945
7946#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
7947
7948/**
7949 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
7950 */
7951DECL_INLINE_THROW(uint32_t)
7952iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
7953{
7954 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
7955 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
7956 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7957 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
7958 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
7959
7960 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
7961 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
7962 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
7963 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7964
7965 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
7966
7967 /*
7968 * Special case for immediates.
7969 */
7970 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
7971 {
7972 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7973 {
7974 case sizeof(uint16_t):
7975 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7976 break;
7977 case sizeof(uint32_t):
7978 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7979 break;
7980 default: AssertFailed(); break;
7981 }
7982 }
7983 else
7984 {
7985 /*
7986 * The generic solution for now.
7987 */
7988 /** @todo optimize this by having the python script make sure the source
7989 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
7990 * statement. Then we could just transfer the register assignments. */
7991 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
7992 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
7993 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7994 {
7995 case sizeof(uint16_t):
7996 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
7997 break;
7998 case sizeof(uint32_t):
7999 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8000 break;
8001 default: AssertFailed(); break;
8002 }
8003 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8004 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8005 }
8006 return off;
8007}
8008
8009
8010
8011/*********************************************************************************************************************************
8012* Emitters for IEM_MC_CALL_CIMPL_XXX *
8013*********************************************************************************************************************************/
8014
8015/**
8016 * Emits code to load a reference to the given guest register into @a idxGprDst.
8017 */
8018DECL_INLINE_THROW(uint32_t)
8019iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8020 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8021{
8022 /*
8023 * Get the offset relative to the CPUMCTX structure.
8024 */
8025 uint32_t offCpumCtx;
8026 switch (enmClass)
8027 {
8028 case kIemNativeGstRegRef_Gpr:
8029 Assert(idxRegInClass < 16);
8030 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8031 break;
8032
8033 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8034 Assert(idxRegInClass < 4);
8035 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8036 break;
8037
8038 case kIemNativeGstRegRef_EFlags:
8039 Assert(idxRegInClass == 0);
8040 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8041 break;
8042
8043 case kIemNativeGstRegRef_MxCsr:
8044 Assert(idxRegInClass == 0);
8045 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8046 break;
8047
8048 case kIemNativeGstRegRef_FpuReg:
8049 Assert(idxRegInClass < 8);
8050 AssertFailed(); /** @todo what kind of indexing? */
8051 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8052 break;
8053
8054 case kIemNativeGstRegRef_MReg:
8055 Assert(idxRegInClass < 8);
8056 AssertFailed(); /** @todo what kind of indexing? */
8057 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8058 break;
8059
8060 case kIemNativeGstRegRef_XReg:
8061 Assert(idxRegInClass < 16);
8062 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8063 break;
8064
8065 default:
8066 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8067 }
8068
8069 /*
8070 * Load the value into the destination register.
8071 */
8072#ifdef RT_ARCH_AMD64
8073 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8074
8075#elif defined(RT_ARCH_ARM64)
8076 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8077 Assert(offCpumCtx < 4096);
8078 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8079
8080#else
8081# error "Port me!"
8082#endif
8083
8084 return off;
8085}
8086
8087
8088/**
8089 * Common code for CIMPL and AIMPL calls.
8090 *
8091 * These are calls that uses argument variables and such. They should not be
8092 * confused with internal calls required to implement an MC operation,
8093 * like a TLB load and similar.
8094 *
8095 * Upon return all that is left to do is to load any hidden arguments and
8096 * perform the call. All argument variables are freed.
8097 *
8098 * @returns New code buffer offset; throws VBox status code on error.
8099 * @param pReNative The native recompile state.
8100 * @param off The code buffer offset.
8101 * @param cArgs The total nubmer of arguments (includes hidden
8102 * count).
8103 * @param cHiddenArgs The number of hidden arguments. The hidden
8104 * arguments must not have any variable declared for
8105 * them, whereas all the regular arguments must
8106 * (tstIEMCheckMc ensures this).
8107 */
8108DECL_HIDDEN_THROW(uint32_t)
8109iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8110{
8111#ifdef VBOX_STRICT
8112 /*
8113 * Assert sanity.
8114 */
8115 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8116 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8117 for (unsigned i = 0; i < cHiddenArgs; i++)
8118 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8119 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8120 {
8121 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8122 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8123 }
8124 iemNativeRegAssertSanity(pReNative);
8125#endif
8126
8127 /*
8128 * Before we do anything else, go over variables that are referenced and
8129 * make sure they are not in a register.
8130 */
8131 uint32_t bmVars = pReNative->Core.bmVars;
8132 if (bmVars)
8133 {
8134 do
8135 {
8136 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8137 bmVars &= ~RT_BIT_32(idxVar);
8138
8139 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8140 {
8141 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8142 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8143 {
8144 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8145 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8146 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8147 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8148 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8149
8150 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8151 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8152 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8153 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8154 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8155 }
8156 }
8157 } while (bmVars != 0);
8158#if 0 //def VBOX_STRICT
8159 iemNativeRegAssertSanity(pReNative);
8160#endif
8161 }
8162
8163 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8164
8165 /*
8166 * First, go over the host registers that will be used for arguments and make
8167 * sure they either hold the desired argument or are free.
8168 */
8169 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8170 {
8171 for (uint32_t i = 0; i < cRegArgs; i++)
8172 {
8173 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8174 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8175 {
8176 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8177 {
8178 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8179 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8180 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
8181 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8182 if (uArgNo == i)
8183 { /* prefect */ }
8184 /* The variable allocator logic should make sure this is impossible,
8185 except for when the return register is used as a parameter (ARM,
8186 but not x86). */
8187#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8188 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8189 {
8190# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8191# error "Implement this"
8192# endif
8193 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8194 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8195 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8196 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8197 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8198 }
8199#endif
8200 else
8201 {
8202 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8203
8204 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
8205 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8206 else
8207 {
8208 /* just free it, can be reloaded if used again */
8209 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8210 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8211 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8212 }
8213 }
8214 }
8215 else
8216 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8217 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8218 }
8219 }
8220#if 0 //def VBOX_STRICT
8221 iemNativeRegAssertSanity(pReNative);
8222#endif
8223 }
8224
8225 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8226
8227#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8228 /*
8229 * If there are any stack arguments, make sure they are in their place as well.
8230 *
8231 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8232 * the caller) be loading it later and it must be free (see first loop).
8233 */
8234 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8235 {
8236 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8237 {
8238 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8239 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8240 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8241 {
8242 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8243 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
8244 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
8245 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8246 }
8247 else
8248 {
8249 /* Use ARG0 as temp for stuff we need registers for. */
8250 switch (pReNative->Core.aVars[idxVar].enmKind)
8251 {
8252 case kIemNativeVarKind_Stack:
8253 {
8254 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8255 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8256 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8257 iemNativeStackCalcBpDisp(idxStackSlot));
8258 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8259 continue;
8260 }
8261
8262 case kIemNativeVarKind_Immediate:
8263 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
8264 continue;
8265
8266 case kIemNativeVarKind_VarRef:
8267 {
8268 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8269 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8270 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8271 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8272 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8273 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8274 {
8275 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8276 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8277 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8278 }
8279 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8280 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8281 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8282 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8283 continue;
8284 }
8285
8286 case kIemNativeVarKind_GstRegRef:
8287 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8288 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8289 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8290 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8291 continue;
8292
8293 case kIemNativeVarKind_Invalid:
8294 case kIemNativeVarKind_End:
8295 break;
8296 }
8297 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8298 }
8299 }
8300# if 0 //def VBOX_STRICT
8301 iemNativeRegAssertSanity(pReNative);
8302# endif
8303 }
8304#else
8305 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8306#endif
8307
8308 /*
8309 * Make sure the argument variables are loaded into their respective registers.
8310 *
8311 * We can optimize this by ASSUMING that any register allocations are for
8312 * registeres that have already been loaded and are ready. The previous step
8313 * saw to that.
8314 */
8315 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8316 {
8317 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8318 {
8319 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8320 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8321 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
8322 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8323 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8324 else
8325 {
8326 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8327 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8328 {
8329 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
8330 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
8331 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
8332 | RT_BIT_32(idxArgReg);
8333 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
8334 }
8335 else
8336 {
8337 /* Use ARG0 as temp for stuff we need registers for. */
8338 switch (pReNative->Core.aVars[idxVar].enmKind)
8339 {
8340 case kIemNativeVarKind_Stack:
8341 {
8342 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8343 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8344 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8345 continue;
8346 }
8347
8348 case kIemNativeVarKind_Immediate:
8349 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
8350 continue;
8351
8352 case kIemNativeVarKind_VarRef:
8353 {
8354 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8355 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8356 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8357 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8358 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8359 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8360 {
8361 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8362 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8363 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8364 }
8365 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8366 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8367 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8368 continue;
8369 }
8370
8371 case kIemNativeVarKind_GstRegRef:
8372 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8373 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8374 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8375 continue;
8376
8377 case kIemNativeVarKind_Invalid:
8378 case kIemNativeVarKind_End:
8379 break;
8380 }
8381 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8382 }
8383 }
8384 }
8385#if 0 //def VBOX_STRICT
8386 iemNativeRegAssertSanity(pReNative);
8387#endif
8388 }
8389#ifdef VBOX_STRICT
8390 else
8391 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8392 {
8393 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8394 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8395 }
8396#endif
8397
8398 /*
8399 * Free all argument variables (simplified).
8400 * Their lifetime always expires with the call they are for.
8401 */
8402 /** @todo Make the python script check that arguments aren't used after
8403 * IEM_MC_CALL_XXXX. */
8404 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8405 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8406 * an argument value. There is also some FPU stuff. */
8407 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8408 {
8409 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8410 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8411
8412 /* no need to free registers: */
8413 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8414 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8415 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8416 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8417 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8418 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8419
8420 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8421 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8422 iemNativeVarFreeStackSlots(pReNative, idxVar);
8423 }
8424 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8425
8426 /*
8427 * Flush volatile registers as we make the call.
8428 */
8429 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8430
8431 return off;
8432}
8433
8434
8435/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
8436DECL_HIDDEN_THROW(uint32_t)
8437iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
8438 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
8439
8440{
8441 /*
8442 * Do all the call setup and cleanup.
8443 */
8444 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
8445
8446 /*
8447 * Load the two or three hidden arguments.
8448 */
8449#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8450 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
8451 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8452 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
8453#else
8454 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8455 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
8456#endif
8457
8458 /*
8459 * Make the call and check the return code.
8460 *
8461 * Shadow PC copies are always flushed here, other stuff depends on flags.
8462 * Segment and general purpose registers are explictily flushed via the
8463 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
8464 * macros.
8465 */
8466 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
8467#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8468 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
8469#endif
8470 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
8471 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
8472 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
8473 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
8474
8475 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
8476}
8477
8478
8479#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
8480 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
8481
8482/** Emits code for IEM_MC_CALL_CIMPL_1. */
8483DECL_INLINE_THROW(uint32_t)
8484iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8485 uintptr_t pfnCImpl, uint8_t idxArg0)
8486{
8487 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8488 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
8489}
8490
8491
8492#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
8493 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
8494
8495/** Emits code for IEM_MC_CALL_CIMPL_2. */
8496DECL_INLINE_THROW(uint32_t)
8497iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8498 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
8499{
8500 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8501 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8502 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
8503}
8504
8505
8506#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
8507 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8508 (uintptr_t)a_pfnCImpl, a0, a1, a2)
8509
8510/** Emits code for IEM_MC_CALL_CIMPL_3. */
8511DECL_INLINE_THROW(uint32_t)
8512iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8513 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8514{
8515 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8516 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8517 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8518 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
8519}
8520
8521
8522#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
8523 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8524 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
8525
8526/** Emits code for IEM_MC_CALL_CIMPL_4. */
8527DECL_INLINE_THROW(uint32_t)
8528iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8529 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8530{
8531 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8532 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8533 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8534 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8535 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
8536}
8537
8538
8539#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
8540 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8541 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
8542
8543/** Emits code for IEM_MC_CALL_CIMPL_4. */
8544DECL_INLINE_THROW(uint32_t)
8545iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8546 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
8547{
8548 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8549 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8550 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8551 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8552 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
8553 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
8554}
8555
8556
8557/** Recompiler debugging: Flush guest register shadow copies. */
8558#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
8559
8560
8561
8562/*********************************************************************************************************************************
8563* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
8564*********************************************************************************************************************************/
8565
8566/**
8567 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
8568 */
8569DECL_INLINE_THROW(uint32_t)
8570iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8571 uintptr_t pfnAImpl, uint8_t cArgs)
8572{
8573 if (idxVarRc != UINT8_MAX)
8574 {
8575 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
8576 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
8577 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
8578 }
8579
8580 /*
8581 * Do all the call setup and cleanup.
8582 */
8583 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
8584
8585 /*
8586 * Make the call and update the return code variable if we've got one.
8587 */
8588 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8589 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
8590 {
8591pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
8592 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
8593 }
8594
8595 return off;
8596}
8597
8598
8599
8600#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
8601 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
8602
8603#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
8604 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
8605
8606/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
8607DECL_INLINE_THROW(uint32_t)
8608iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
8609{
8610 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
8611}
8612
8613
8614#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
8615 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
8616
8617#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
8618 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
8619
8620/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
8621DECL_INLINE_THROW(uint32_t)
8622iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
8623{
8624 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8625 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
8626}
8627
8628
8629#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
8630 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
8631
8632#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
8633 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
8634
8635/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
8636DECL_INLINE_THROW(uint32_t)
8637iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8638 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8639{
8640 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8641 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8642 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
8643}
8644
8645
8646#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
8647 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
8648
8649#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
8650 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
8651
8652/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
8653DECL_INLINE_THROW(uint32_t)
8654iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8655 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8656{
8657 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8658 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8659 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8660 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
8661}
8662
8663
8664#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
8665 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8666
8667#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
8668 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8669
8670/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
8671DECL_INLINE_THROW(uint32_t)
8672iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8673 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8674{
8675 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8676 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8677 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8678 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
8679 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
8680}
8681
8682
8683
8684/*********************************************************************************************************************************
8685* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
8686*********************************************************************************************************************************/
8687
8688#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
8689 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
8690
8691#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8692 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
8693
8694#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8695 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
8696
8697#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8698 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
8699
8700
8701/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
8702 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
8703DECL_INLINE_THROW(uint32_t)
8704iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
8705{
8706 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8707 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8708 Assert(iGRegEx < 20);
8709
8710 /* Same discussion as in iemNativeEmitFetchGregU16 */
8711 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8712 kIemNativeGstRegUse_ReadOnly);
8713
8714 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8715 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8716
8717 /* The value is zero-extended to the full 64-bit host register width. */
8718 if (iGRegEx < 16)
8719 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8720 else
8721 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8722
8723 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8724 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8725 return off;
8726}
8727
8728
8729#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8730 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
8731
8732#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8733 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
8734
8735#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8736 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
8737
8738/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
8739DECL_INLINE_THROW(uint32_t)
8740iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
8741{
8742 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8743 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8744 Assert(iGRegEx < 20);
8745
8746 /* Same discussion as in iemNativeEmitFetchGregU16 */
8747 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8748 kIemNativeGstRegUse_ReadOnly);
8749
8750 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8751 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8752
8753 if (iGRegEx < 16)
8754 {
8755 switch (cbSignExtended)
8756 {
8757 case sizeof(uint16_t):
8758 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8759 break;
8760 case sizeof(uint32_t):
8761 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8762 break;
8763 case sizeof(uint64_t):
8764 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8765 break;
8766 default: AssertFailed(); break;
8767 }
8768 }
8769 else
8770 {
8771 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8772 switch (cbSignExtended)
8773 {
8774 case sizeof(uint16_t):
8775 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8776 break;
8777 case sizeof(uint32_t):
8778 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8779 break;
8780 case sizeof(uint64_t):
8781 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8782 break;
8783 default: AssertFailed(); break;
8784 }
8785 }
8786
8787 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8788 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8789 return off;
8790}
8791
8792
8793
8794#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
8795 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
8796
8797#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
8798 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8799
8800#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
8801 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8802
8803/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
8804DECL_INLINE_THROW(uint32_t)
8805iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8806{
8807 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8808 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8809 Assert(iGReg < 16);
8810
8811 /*
8812 * We can either just load the low 16-bit of the GPR into a host register
8813 * for the variable, or we can do so via a shadow copy host register. The
8814 * latter will avoid having to reload it if it's being stored later, but
8815 * will waste a host register if it isn't touched again. Since we don't
8816 * know what going to happen, we choose the latter for now.
8817 */
8818 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8819 kIemNativeGstRegUse_ReadOnly);
8820
8821 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8822 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8823 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8824 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8825
8826 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8827 return off;
8828}
8829
8830
8831#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
8832 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8833
8834#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
8835 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8836
8837/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
8838DECL_INLINE_THROW(uint32_t)
8839iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
8840{
8841 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8842 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8843 Assert(iGReg < 16);
8844
8845 /*
8846 * We can either just load the low 16-bit of the GPR into a host register
8847 * for the variable, or we can do so via a shadow copy host register. The
8848 * latter will avoid having to reload it if it's being stored later, but
8849 * will waste a host register if it isn't touched again. Since we don't
8850 * know what going to happen, we choose the latter for now.
8851 */
8852 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8853 kIemNativeGstRegUse_ReadOnly);
8854
8855 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8856 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8857 if (cbSignExtended == sizeof(uint32_t))
8858 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8859 else
8860 {
8861 Assert(cbSignExtended == sizeof(uint64_t));
8862 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8863 }
8864 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8865
8866 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8867 return off;
8868}
8869
8870
8871#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
8872 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
8873
8874#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
8875 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
8876
8877/** Emits code for IEM_MC_FETCH_GREG_U32. */
8878DECL_INLINE_THROW(uint32_t)
8879iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8880{
8881 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8882 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
8883 Assert(iGReg < 16);
8884
8885 /*
8886 * We can either just load the low 16-bit of the GPR into a host register
8887 * for the variable, or we can do so via a shadow copy host register. The
8888 * latter will avoid having to reload it if it's being stored later, but
8889 * will waste a host register if it isn't touched again. Since we don't
8890 * know what going to happen, we choose the latter for now.
8891 */
8892 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8893 kIemNativeGstRegUse_ReadOnly);
8894
8895 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8896 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8897 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8898 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8899
8900 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8901 return off;
8902}
8903
8904
8905#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
8906 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
8907
8908/** Emits code for IEM_MC_FETCH_GREG_U32. */
8909DECL_INLINE_THROW(uint32_t)
8910iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8911{
8912 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8913 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8914 Assert(iGReg < 16);
8915
8916 /*
8917 * We can either just load the low 32-bit of the GPR into a host register
8918 * for the variable, or we can do so via a shadow copy host register. The
8919 * latter will avoid having to reload it if it's being stored later, but
8920 * will waste a host register if it isn't touched again. Since we don't
8921 * know what going to happen, we choose the latter for now.
8922 */
8923 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8924 kIemNativeGstRegUse_ReadOnly);
8925
8926 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8927 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8928 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8929 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8930
8931 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8932 return off;
8933}
8934
8935
8936#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
8937 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8938
8939#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
8940 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8941
8942/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
8943 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
8944DECL_INLINE_THROW(uint32_t)
8945iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8946{
8947 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8948 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8949 Assert(iGReg < 16);
8950
8951 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8952 kIemNativeGstRegUse_ReadOnly);
8953
8954 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8955 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8956 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
8957 /** @todo name the register a shadow one already? */
8958 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8959
8960 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8961 return off;
8962}
8963
8964
8965
8966/*********************************************************************************************************************************
8967* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
8968*********************************************************************************************************************************/
8969
8970#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
8971 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
8972
8973/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
8974DECL_INLINE_THROW(uint32_t)
8975iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
8976{
8977 Assert(iGRegEx < 20);
8978 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8979 kIemNativeGstRegUse_ForUpdate);
8980#ifdef RT_ARCH_AMD64
8981 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8982
8983 /* To the lowest byte of the register: mov r8, imm8 */
8984 if (iGRegEx < 16)
8985 {
8986 if (idxGstTmpReg >= 8)
8987 pbCodeBuf[off++] = X86_OP_REX_B;
8988 else if (idxGstTmpReg >= 4)
8989 pbCodeBuf[off++] = X86_OP_REX;
8990 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8991 pbCodeBuf[off++] = u8Value;
8992 }
8993 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
8994 else if (idxGstTmpReg < 4)
8995 {
8996 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
8997 pbCodeBuf[off++] = u8Value;
8998 }
8999 else
9000 {
9001 /* ror reg64, 8 */
9002 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9003 pbCodeBuf[off++] = 0xc1;
9004 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9005 pbCodeBuf[off++] = 8;
9006
9007 /* mov reg8, imm8 */
9008 if (idxGstTmpReg >= 8)
9009 pbCodeBuf[off++] = X86_OP_REX_B;
9010 else if (idxGstTmpReg >= 4)
9011 pbCodeBuf[off++] = X86_OP_REX;
9012 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9013 pbCodeBuf[off++] = u8Value;
9014
9015 /* rol reg64, 8 */
9016 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9017 pbCodeBuf[off++] = 0xc1;
9018 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9019 pbCodeBuf[off++] = 8;
9020 }
9021
9022#elif defined(RT_ARCH_ARM64)
9023 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9024 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9025 if (iGRegEx < 16)
9026 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9027 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9028 else
9029 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9030 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9031 iemNativeRegFreeTmp(pReNative, idxImmReg);
9032
9033#else
9034# error "Port me!"
9035#endif
9036
9037 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9038
9039 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9040
9041 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9042 return off;
9043}
9044
9045
9046#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9047 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9048
9049/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9050DECL_INLINE_THROW(uint32_t)
9051iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9052{
9053 Assert(iGRegEx < 20);
9054 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9055
9056 /*
9057 * If it's a constant value (unlikely) we treat this as a
9058 * IEM_MC_STORE_GREG_U8_CONST statement.
9059 */
9060 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9061 { /* likely */ }
9062 else
9063 {
9064 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9065 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9066 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9067 }
9068
9069 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9070 kIemNativeGstRegUse_ForUpdate);
9071 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9072
9073#ifdef RT_ARCH_AMD64
9074 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
9075 if (iGRegEx < 16)
9076 {
9077 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
9078 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9079 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9080 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9081 pbCodeBuf[off++] = X86_OP_REX;
9082 pbCodeBuf[off++] = 0x8a;
9083 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9084 }
9085 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
9086 else if (idxGstTmpReg < 4 && idxVarReg < 4)
9087 {
9088 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
9089 pbCodeBuf[off++] = 0x8a;
9090 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
9091 }
9092 else
9093 {
9094 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
9095
9096 /* ror reg64, 8 */
9097 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9098 pbCodeBuf[off++] = 0xc1;
9099 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9100 pbCodeBuf[off++] = 8;
9101
9102 /* mov reg8, reg8(r/m) */
9103 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9104 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9105 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9106 pbCodeBuf[off++] = X86_OP_REX;
9107 pbCodeBuf[off++] = 0x8a;
9108 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9109
9110 /* rol reg64, 8 */
9111 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9112 pbCodeBuf[off++] = 0xc1;
9113 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9114 pbCodeBuf[off++] = 8;
9115 }
9116
9117#elif defined(RT_ARCH_ARM64)
9118 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
9119 or
9120 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
9121 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9122 if (iGRegEx < 16)
9123 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
9124 else
9125 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
9126
9127#else
9128# error "Port me!"
9129#endif
9130 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9131
9132 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9133
9134 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9135 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9136 return off;
9137}
9138
9139
9140
9141#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
9142 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
9143
9144/** Emits code for IEM_MC_STORE_GREG_U16. */
9145DECL_INLINE_THROW(uint32_t)
9146iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
9147{
9148 Assert(iGReg < 16);
9149 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9150 kIemNativeGstRegUse_ForUpdate);
9151#ifdef RT_ARCH_AMD64
9152 /* mov reg16, imm16 */
9153 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9154 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9155 if (idxGstTmpReg >= 8)
9156 pbCodeBuf[off++] = X86_OP_REX_B;
9157 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
9158 pbCodeBuf[off++] = RT_BYTE1(uValue);
9159 pbCodeBuf[off++] = RT_BYTE2(uValue);
9160
9161#elif defined(RT_ARCH_ARM64)
9162 /* movk xdst, #uValue, lsl #0 */
9163 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9164 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
9165
9166#else
9167# error "Port me!"
9168#endif
9169
9170 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9171
9172 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9173 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9174 return off;
9175}
9176
9177
9178#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
9179 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
9180
9181/** Emits code for IEM_MC_STORE_GREG_U16. */
9182DECL_INLINE_THROW(uint32_t)
9183iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9184{
9185 Assert(iGReg < 16);
9186 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9187
9188 /*
9189 * If it's a constant value (unlikely) we treat this as a
9190 * IEM_MC_STORE_GREG_U16_CONST statement.
9191 */
9192 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9193 { /* likely */ }
9194 else
9195 {
9196 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9197 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9198 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9199 }
9200
9201 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9202 kIemNativeGstRegUse_ForUpdate);
9203
9204#ifdef RT_ARCH_AMD64
9205 /* mov reg16, reg16 or [mem16] */
9206 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9207 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9208 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9209 {
9210 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
9211 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
9212 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
9213 pbCodeBuf[off++] = 0x8b;
9214 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
9215 }
9216 else
9217 {
9218 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
9219 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9220 if (idxGstTmpReg >= 8)
9221 pbCodeBuf[off++] = X86_OP_REX_R;
9222 pbCodeBuf[off++] = 0x8b;
9223 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9224 }
9225
9226#elif defined(RT_ARCH_ARM64)
9227 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
9228 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9229 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9230 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
9231 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9232
9233#else
9234# error "Port me!"
9235#endif
9236
9237 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9238
9239 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9240 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9241 return off;
9242}
9243
9244
9245#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
9246 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
9247
9248/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
9249DECL_INLINE_THROW(uint32_t)
9250iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
9251{
9252 Assert(iGReg < 16);
9253 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9254 kIemNativeGstRegUse_ForFullWrite);
9255 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9256 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9257 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9258 return off;
9259}
9260
9261
9262#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
9263 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
9264
9265/** Emits code for IEM_MC_STORE_GREG_U32. */
9266DECL_INLINE_THROW(uint32_t)
9267iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9268{
9269 Assert(iGReg < 16);
9270 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9271
9272 /*
9273 * If it's a constant value (unlikely) we treat this as a
9274 * IEM_MC_STORE_GREG_U32_CONST statement.
9275 */
9276 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9277 { /* likely */ }
9278 else
9279 {
9280 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9281 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9282 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9283 }
9284
9285 /*
9286 * For the rest we allocate a guest register for the variable and writes
9287 * it to the CPUMCTX structure.
9288 */
9289 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9290 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9291#ifdef VBOX_STRICT
9292 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
9293#endif
9294 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9295 return off;
9296}
9297
9298
9299#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
9300 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
9301
9302/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
9303DECL_INLINE_THROW(uint32_t)
9304iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
9305{
9306 Assert(iGReg < 16);
9307 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9308 kIemNativeGstRegUse_ForFullWrite);
9309 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9310 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9311 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9312 return off;
9313}
9314
9315
9316#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
9317 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
9318
9319/** Emits code for IEM_MC_STORE_GREG_U64. */
9320DECL_INLINE_THROW(uint32_t)
9321iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9322{
9323 Assert(iGReg < 16);
9324 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9325
9326 /*
9327 * If it's a constant value (unlikely) we treat this as a
9328 * IEM_MC_STORE_GREG_U64_CONST statement.
9329 */
9330 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9331 { /* likely */ }
9332 else
9333 {
9334 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9335 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9336 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
9337 }
9338
9339 /*
9340 * For the rest we allocate a guest register for the variable and writes
9341 * it to the CPUMCTX structure.
9342 */
9343 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9344 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9345 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9346 return off;
9347}
9348
9349
9350#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
9351 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
9352
9353/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
9354DECL_INLINE_THROW(uint32_t)
9355iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
9356{
9357 Assert(iGReg < 16);
9358 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9359 kIemNativeGstRegUse_ForUpdate);
9360 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
9361 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9362 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9363 return off;
9364}
9365
9366
9367/*********************************************************************************************************************************
9368* General purpose register manipulation (add, sub). *
9369*********************************************************************************************************************************/
9370
9371#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9372 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9373
9374/** Emits code for IEM_MC_ADD_GREG_U16. */
9375DECL_INLINE_THROW(uint32_t)
9376iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
9377{
9378 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9379 kIemNativeGstRegUse_ForUpdate);
9380
9381#ifdef RT_ARCH_AMD64
9382 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9383 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9384 if (idxGstTmpReg >= 8)
9385 pbCodeBuf[off++] = X86_OP_REX_B;
9386 if (uAddend == 1)
9387 {
9388 pbCodeBuf[off++] = 0xff; /* inc */
9389 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9390 }
9391 else
9392 {
9393 pbCodeBuf[off++] = 0x81;
9394 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9395 pbCodeBuf[off++] = uAddend;
9396 pbCodeBuf[off++] = 0;
9397 }
9398
9399#else
9400 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9401 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9402
9403 /* sub tmp, gstgrp, uAddend */
9404 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
9405
9406 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9407 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9408
9409 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9410#endif
9411
9412 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9413
9414 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9415
9416 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9417 return off;
9418}
9419
9420
9421#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
9422 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9423
9424#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
9425 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9426
9427/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
9428DECL_INLINE_THROW(uint32_t)
9429iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
9430{
9431 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9432 kIemNativeGstRegUse_ForUpdate);
9433
9434#ifdef RT_ARCH_AMD64
9435 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9436 if (f64Bit)
9437 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9438 else if (idxGstTmpReg >= 8)
9439 pbCodeBuf[off++] = X86_OP_REX_B;
9440 if (uAddend == 1)
9441 {
9442 pbCodeBuf[off++] = 0xff; /* inc */
9443 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9444 }
9445 else if (uAddend < 128)
9446 {
9447 pbCodeBuf[off++] = 0x83; /* add */
9448 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9449 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9450 }
9451 else
9452 {
9453 pbCodeBuf[off++] = 0x81; /* add */
9454 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9455 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9456 pbCodeBuf[off++] = 0;
9457 pbCodeBuf[off++] = 0;
9458 pbCodeBuf[off++] = 0;
9459 }
9460
9461#else
9462 /* sub tmp, gstgrp, uAddend */
9463 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9464 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
9465
9466#endif
9467
9468 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9469
9470 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9471
9472 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9473 return off;
9474}
9475
9476
9477
9478#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9479 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9480
9481/** Emits code for IEM_MC_SUB_GREG_U16. */
9482DECL_INLINE_THROW(uint32_t)
9483iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
9484{
9485 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9486 kIemNativeGstRegUse_ForUpdate);
9487
9488#ifdef RT_ARCH_AMD64
9489 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9490 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9491 if (idxGstTmpReg >= 8)
9492 pbCodeBuf[off++] = X86_OP_REX_B;
9493 if (uSubtrahend == 1)
9494 {
9495 pbCodeBuf[off++] = 0xff; /* dec */
9496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9497 }
9498 else
9499 {
9500 pbCodeBuf[off++] = 0x81;
9501 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9502 pbCodeBuf[off++] = uSubtrahend;
9503 pbCodeBuf[off++] = 0;
9504 }
9505
9506#else
9507 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9508 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9509
9510 /* sub tmp, gstgrp, uSubtrahend */
9511 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
9512
9513 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9514 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9515
9516 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9517#endif
9518
9519 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9520
9521 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9522
9523 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9524 return off;
9525}
9526
9527
9528#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
9529 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9530
9531#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
9532 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9533
9534/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
9535DECL_INLINE_THROW(uint32_t)
9536iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
9537{
9538 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9539 kIemNativeGstRegUse_ForUpdate);
9540
9541#ifdef RT_ARCH_AMD64
9542 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9543 if (f64Bit)
9544 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9545 else if (idxGstTmpReg >= 8)
9546 pbCodeBuf[off++] = X86_OP_REX_B;
9547 if (uSubtrahend == 1)
9548 {
9549 pbCodeBuf[off++] = 0xff; /* dec */
9550 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9551 }
9552 else if (uSubtrahend < 128)
9553 {
9554 pbCodeBuf[off++] = 0x83; /* sub */
9555 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9556 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9557 }
9558 else
9559 {
9560 pbCodeBuf[off++] = 0x81; /* sub */
9561 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9562 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9563 pbCodeBuf[off++] = 0;
9564 pbCodeBuf[off++] = 0;
9565 pbCodeBuf[off++] = 0;
9566 }
9567
9568#else
9569 /* sub tmp, gstgrp, uSubtrahend */
9570 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9571 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
9572
9573#endif
9574
9575 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9576
9577 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9578
9579 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9580 return off;
9581}
9582
9583
9584
9585/*********************************************************************************************************************************
9586* EFLAGS *
9587*********************************************************************************************************************************/
9588
9589#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9590# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
9591#else
9592# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
9593 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
9594
9595DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
9596{
9597 if (fEflOutput)
9598 {
9599 IEMLIVENESSPART2 const LivenessInfo2 = pReNative->paLivenessEntries[pReNative->idxCurCall].s2;
9600 PVMCPUCC const pVCpu = pReNative->pVCpu;
9601# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_u2LivenessMember, a_CoreStatName) \
9602 if (fEflOutput & (a_fEfl)) \
9603 { \
9604 if (LivenessInfo2.a_u2LivenessMember != IEMLIVENESS_STATE_CLOBBERED) \
9605 STAM_COUNTER_INC(&pVCpu->iem.s. a_CoreStatName ## Required); \
9606 else \
9607 STAM_COUNTER_INC(&pVCpu->iem.s. a_CoreStatName ## Skippable); \
9608 } else do { } while (0)
9609 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, u2EflCf, StatNativeLivenessEflCf);
9610 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, u2EflPf, StatNativeLivenessEflPf);
9611 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, u2EflAf, StatNativeLivenessEflAf);
9612 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, u2EflZf, StatNativeLivenessEflZf);
9613 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, u2EflSf, StatNativeLivenessEflSf);
9614 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, u2EflOf, StatNativeLivenessEflOf);
9615 CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, u2EflOther, StatNativeLivenessEflOther);
9616# undef CHECK_FLAG_AND_UPDATE_STATS
9617 }
9618 RT_NOREF(fEflInput);
9619}
9620#endif /* VBOX_WITH_STATISTICS */
9621
9622#undef IEM_MC_FETCH_EFLAGS /* should not be used */
9623#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
9624 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
9625
9626/** Handles IEM_MC_FETCH_EFLAGS_EX. */
9627DECL_INLINE_THROW(uint32_t)
9628iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
9629{
9630 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9631 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9632
9633 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
9634 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9635 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
9636 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9637 return off;
9638}
9639
9640
9641
9642/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
9643 * start using it with custom native code emission (inlining assembly
9644 * instruction helpers). */
9645#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
9646#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
9647 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
9648 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
9649
9650/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
9651DECL_INLINE_THROW(uint32_t)
9652iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
9653{
9654 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9655 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9656
9657 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
9658
9659#ifdef VBOX_STRICT
9660 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
9661 uint32_t offFixup = off;
9662 off = iemNativeEmitJnzToFixed(pReNative, off, off);
9663 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
9664 iemNativeFixupFixedJump(pReNative, offFixup, off);
9665
9666 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
9667 offFixup = off;
9668 off = iemNativeEmitJzToFixed(pReNative, off, off);
9669 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
9670 iemNativeFixupFixedJump(pReNative, offFixup, off);
9671#endif
9672
9673 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9674 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
9675 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9676 return off;
9677}
9678
9679
9680
9681/*********************************************************************************************************************************
9682* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
9683*********************************************************************************************************************************/
9684
9685#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
9686 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
9687
9688#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
9689 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
9690
9691#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
9692 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
9693
9694
9695/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
9696 * IEM_MC_FETCH_SREG_ZX_U64. */
9697DECL_INLINE_THROW(uint32_t)
9698iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
9699{
9700 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9701 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
9702 Assert(iSReg < X86_SREG_COUNT);
9703
9704 /*
9705 * For now, we will not create a shadow copy of a selector. The rational
9706 * is that since we do not recompile the popping and loading of segment
9707 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
9708 * pushing and moving to registers, there is only a small chance that the
9709 * shadow copy will be accessed again before the register is reloaded. One
9710 * scenario would be nested called in 16-bit code, but I doubt it's worth
9711 * the extra register pressure atm.
9712 *
9713 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
9714 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
9715 * store scencario covered at present (r160730).
9716 */
9717 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9718 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9719 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
9720 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9721 return off;
9722}
9723
9724
9725
9726/*********************************************************************************************************************************
9727* Register references. *
9728*********************************************************************************************************************************/
9729
9730#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
9731 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
9732
9733#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
9734 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
9735
9736/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
9737DECL_INLINE_THROW(uint32_t)
9738iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
9739{
9740 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9741 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9742 Assert(iGRegEx < 20);
9743
9744 if (iGRegEx < 16)
9745 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9746 else
9747 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
9748
9749 /* If we've delayed writing back the register value, flush it now. */
9750 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9751
9752 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9753 if (!fConst)
9754 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
9755
9756 return off;
9757}
9758
9759#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
9760 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
9761
9762#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
9763 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
9764
9765#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
9766 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
9767
9768#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
9769 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
9770
9771#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
9772 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
9773
9774#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
9775 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
9776
9777#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
9778 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
9779
9780#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
9781 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
9782
9783#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
9784 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
9785
9786#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
9787 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
9788
9789/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
9790DECL_INLINE_THROW(uint32_t)
9791iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
9792{
9793 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9794 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9795 Assert(iGReg < 16);
9796
9797 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
9798
9799 /* If we've delayed writing back the register value, flush it now. */
9800 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
9801
9802 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9803 if (!fConst)
9804 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
9805
9806 return off;
9807}
9808
9809
9810#undef IEM_MC_REF_EFLAGS /* should not be used. */
9811#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
9812 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
9813 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
9814
9815/** Handles IEM_MC_REF_EFLAGS. */
9816DECL_INLINE_THROW(uint32_t)
9817iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
9818{
9819 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9820 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9821
9822 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
9823
9824 /* If we've delayed writing back the register value, flush it now. */
9825 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
9826
9827 /* If there is a shadow copy of guest EFLAGS, flush it now. */
9828 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
9829
9830 return off;
9831}
9832
9833
9834/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
9835 * different code from threaded recompiler, maybe it would be helpful. For now
9836 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
9837#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
9838
9839
9840
9841/*********************************************************************************************************************************
9842* Effective Address Calculation *
9843*********************************************************************************************************************************/
9844#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
9845 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
9846
9847/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
9848 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
9849DECL_INLINE_THROW(uint32_t)
9850iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9851 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
9852{
9853 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9854
9855 /*
9856 * Handle the disp16 form with no registers first.
9857 *
9858 * Convert to an immediate value, as that'll delay the register allocation
9859 * and assignment till the memory access / call / whatever and we can use
9860 * a more appropriate register (or none at all).
9861 */
9862 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
9863 {
9864 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
9865 return off;
9866 }
9867
9868 /* Determin the displacment. */
9869 uint16_t u16EffAddr;
9870 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9871 {
9872 case 0: u16EffAddr = 0; break;
9873 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
9874 case 2: u16EffAddr = u16Disp; break;
9875 default: AssertFailedStmt(u16EffAddr = 0);
9876 }
9877
9878 /* Determine the registers involved. */
9879 uint8_t idxGstRegBase;
9880 uint8_t idxGstRegIndex;
9881 switch (bRm & X86_MODRM_RM_MASK)
9882 {
9883 case 0:
9884 idxGstRegBase = X86_GREG_xBX;
9885 idxGstRegIndex = X86_GREG_xSI;
9886 break;
9887 case 1:
9888 idxGstRegBase = X86_GREG_xBX;
9889 idxGstRegIndex = X86_GREG_xDI;
9890 break;
9891 case 2:
9892 idxGstRegBase = X86_GREG_xBP;
9893 idxGstRegIndex = X86_GREG_xSI;
9894 break;
9895 case 3:
9896 idxGstRegBase = X86_GREG_xBP;
9897 idxGstRegIndex = X86_GREG_xDI;
9898 break;
9899 case 4:
9900 idxGstRegBase = X86_GREG_xSI;
9901 idxGstRegIndex = UINT8_MAX;
9902 break;
9903 case 5:
9904 idxGstRegBase = X86_GREG_xDI;
9905 idxGstRegIndex = UINT8_MAX;
9906 break;
9907 case 6:
9908 idxGstRegBase = X86_GREG_xBP;
9909 idxGstRegIndex = UINT8_MAX;
9910 break;
9911#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
9912 default:
9913#endif
9914 case 7:
9915 idxGstRegBase = X86_GREG_xBX;
9916 idxGstRegIndex = UINT8_MAX;
9917 break;
9918 }
9919
9920 /*
9921 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
9922 */
9923 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9924 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9925 kIemNativeGstRegUse_ReadOnly);
9926 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
9927 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9928 kIemNativeGstRegUse_ReadOnly)
9929 : UINT8_MAX;
9930#ifdef RT_ARCH_AMD64
9931 if (idxRegIndex == UINT8_MAX)
9932 {
9933 if (u16EffAddr == 0)
9934 {
9935 /* movxz ret, base */
9936 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
9937 }
9938 else
9939 {
9940 /* lea ret32, [base64 + disp32] */
9941 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9942 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9943 if (idxRegRet >= 8 || idxRegBase >= 8)
9944 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9945 pbCodeBuf[off++] = 0x8d;
9946 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9947 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
9948 else
9949 {
9950 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
9951 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9952 }
9953 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9954 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9955 pbCodeBuf[off++] = 0;
9956 pbCodeBuf[off++] = 0;
9957 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9958
9959 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9960 }
9961 }
9962 else
9963 {
9964 /* lea ret32, [index64 + base64 (+ disp32)] */
9965 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9966 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9967 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9968 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9969 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9970 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9971 pbCodeBuf[off++] = 0x8d;
9972 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
9973 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9974 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
9975 if (bMod == X86_MOD_MEM4)
9976 {
9977 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9978 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9979 pbCodeBuf[off++] = 0;
9980 pbCodeBuf[off++] = 0;
9981 }
9982 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9983 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9984 }
9985
9986#elif defined(RT_ARCH_ARM64)
9987 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9988 if (u16EffAddr == 0)
9989 {
9990 if (idxRegIndex == UINT8_MAX)
9991 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
9992 else
9993 {
9994 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
9995 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9996 }
9997 }
9998 else
9999 {
10000 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
10001 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
10002 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
10003 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10004 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
10005 else
10006 {
10007 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
10008 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10009 }
10010 if (idxRegIndex != UINT8_MAX)
10011 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
10012 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10013 }
10014
10015#else
10016# error "port me"
10017#endif
10018
10019 if (idxRegIndex != UINT8_MAX)
10020 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10021 iemNativeRegFreeTmp(pReNative, idxRegBase);
10022 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10023 return off;
10024}
10025
10026
10027#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
10028 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
10029
10030/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
10031 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
10032DECL_INLINE_THROW(uint32_t)
10033iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10034 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
10035{
10036 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10037
10038 /*
10039 * Handle the disp32 form with no registers first.
10040 *
10041 * Convert to an immediate value, as that'll delay the register allocation
10042 * and assignment till the memory access / call / whatever and we can use
10043 * a more appropriate register (or none at all).
10044 */
10045 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10046 {
10047 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
10048 return off;
10049 }
10050
10051 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10052 uint32_t u32EffAddr = 0;
10053 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10054 {
10055 case 0: break;
10056 case 1: u32EffAddr = (int8_t)u32Disp; break;
10057 case 2: u32EffAddr = u32Disp; break;
10058 default: AssertFailed();
10059 }
10060
10061 /* Get the register (or SIB) value. */
10062 uint8_t idxGstRegBase = UINT8_MAX;
10063 uint8_t idxGstRegIndex = UINT8_MAX;
10064 uint8_t cShiftIndex = 0;
10065 switch (bRm & X86_MODRM_RM_MASK)
10066 {
10067 case 0: idxGstRegBase = X86_GREG_xAX; break;
10068 case 1: idxGstRegBase = X86_GREG_xCX; break;
10069 case 2: idxGstRegBase = X86_GREG_xDX; break;
10070 case 3: idxGstRegBase = X86_GREG_xBX; break;
10071 case 4: /* SIB */
10072 {
10073 /* index /w scaling . */
10074 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10075 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10076 {
10077 case 0: idxGstRegIndex = X86_GREG_xAX; break;
10078 case 1: idxGstRegIndex = X86_GREG_xCX; break;
10079 case 2: idxGstRegIndex = X86_GREG_xDX; break;
10080 case 3: idxGstRegIndex = X86_GREG_xBX; break;
10081 case 4: cShiftIndex = 0; /*no index*/ break;
10082 case 5: idxGstRegIndex = X86_GREG_xBP; break;
10083 case 6: idxGstRegIndex = X86_GREG_xSI; break;
10084 case 7: idxGstRegIndex = X86_GREG_xDI; break;
10085 }
10086
10087 /* base */
10088 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
10089 {
10090 case 0: idxGstRegBase = X86_GREG_xAX; break;
10091 case 1: idxGstRegBase = X86_GREG_xCX; break;
10092 case 2: idxGstRegBase = X86_GREG_xDX; break;
10093 case 3: idxGstRegBase = X86_GREG_xBX; break;
10094 case 4:
10095 idxGstRegBase = X86_GREG_xSP;
10096 u32EffAddr += uSibAndRspOffset >> 8;
10097 break;
10098 case 5:
10099 if ((bRm & X86_MODRM_MOD_MASK) != 0)
10100 idxGstRegBase = X86_GREG_xBP;
10101 else
10102 {
10103 Assert(u32EffAddr == 0);
10104 u32EffAddr = u32Disp;
10105 }
10106 break;
10107 case 6: idxGstRegBase = X86_GREG_xSI; break;
10108 case 7: idxGstRegBase = X86_GREG_xDI; break;
10109 }
10110 break;
10111 }
10112 case 5: idxGstRegBase = X86_GREG_xBP; break;
10113 case 6: idxGstRegBase = X86_GREG_xSI; break;
10114 case 7: idxGstRegBase = X86_GREG_xDI; break;
10115 }
10116
10117 /*
10118 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10119 * the start of the function.
10120 */
10121 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10122 {
10123 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
10124 return off;
10125 }
10126
10127 /*
10128 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10129 */
10130 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10131 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10132 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10133 kIemNativeGstRegUse_ReadOnly);
10134 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10135 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10136 kIemNativeGstRegUse_ReadOnly);
10137
10138 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10139 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10140 {
10141 idxRegBase = idxRegIndex;
10142 idxRegIndex = UINT8_MAX;
10143 }
10144
10145#ifdef RT_ARCH_AMD64
10146 if (idxRegIndex == UINT8_MAX)
10147 {
10148 if (u32EffAddr == 0)
10149 {
10150 /* mov ret, base */
10151 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10152 }
10153 else
10154 {
10155 /* lea ret32, [base64 + disp32] */
10156 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10157 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10158 if (idxRegRet >= 8 || idxRegBase >= 8)
10159 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10160 pbCodeBuf[off++] = 0x8d;
10161 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10162 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10163 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10164 else
10165 {
10166 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10167 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10168 }
10169 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10170 if (bMod == X86_MOD_MEM4)
10171 {
10172 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10173 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10174 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10175 }
10176 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10177 }
10178 }
10179 else
10180 {
10181 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10182 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10183 if (idxRegBase == UINT8_MAX)
10184 {
10185 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
10186 if (idxRegRet >= 8 || idxRegIndex >= 8)
10187 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10188 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10189 pbCodeBuf[off++] = 0x8d;
10190 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10191 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10192 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10193 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10194 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10195 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10196 }
10197 else
10198 {
10199 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10200 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10201 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10202 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10203 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10204 pbCodeBuf[off++] = 0x8d;
10205 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10206 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10207 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10208 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10209 if (bMod != X86_MOD_MEM0)
10210 {
10211 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10212 if (bMod == X86_MOD_MEM4)
10213 {
10214 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10215 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10216 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10217 }
10218 }
10219 }
10220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10221 }
10222
10223#elif defined(RT_ARCH_ARM64)
10224 if (u32EffAddr == 0)
10225 {
10226 if (idxRegIndex == UINT8_MAX)
10227 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10228 else if (idxRegBase == UINT8_MAX)
10229 {
10230 if (cShiftIndex == 0)
10231 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
10232 else
10233 {
10234 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10235 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
10236 }
10237 }
10238 else
10239 {
10240 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10242 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10243 }
10244 }
10245 else
10246 {
10247 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
10248 {
10249 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10250 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
10251 }
10252 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
10253 {
10254 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10255 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10256 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
10257 }
10258 else
10259 {
10260 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
10261 if (idxRegBase != UINT8_MAX)
10262 {
10263 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10264 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10265 }
10266 }
10267 if (idxRegIndex != UINT8_MAX)
10268 {
10269 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10270 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10271 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10272 }
10273 }
10274
10275#else
10276# error "port me"
10277#endif
10278
10279 if (idxRegIndex != UINT8_MAX)
10280 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10281 if (idxRegBase != UINT8_MAX)
10282 iemNativeRegFreeTmp(pReNative, idxRegBase);
10283 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10284 return off;
10285}
10286
10287
10288#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10289 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10290 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10291
10292#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10293 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10294 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10295
10296#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10297 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10298 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
10299
10300/**
10301 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
10302 *
10303 * @returns New off.
10304 * @param pReNative .
10305 * @param off .
10306 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
10307 * bit 4 to REX.X. The two bits are part of the
10308 * REG sub-field, which isn't needed in this
10309 * function.
10310 * @param uSibAndRspOffset Two parts:
10311 * - The first 8 bits make up the SIB byte.
10312 * - The next 8 bits are the fixed RSP/ESP offset
10313 * in case of a pop [xSP].
10314 * @param u32Disp The displacement byte/word/dword, if any.
10315 * @param cbInstr The size of the fully decoded instruction. Used
10316 * for RIP relative addressing.
10317 * @param idxVarRet The result variable number.
10318 * @param f64Bit Whether to use a 64-bit or 32-bit address size
10319 * when calculating the address.
10320 *
10321 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
10322 */
10323DECL_INLINE_THROW(uint32_t)
10324iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
10325 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
10326{
10327 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10328
10329 /*
10330 * Special case the rip + disp32 form first.
10331 */
10332 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10333 {
10334 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10335 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
10336 kIemNativeGstRegUse_ReadOnly);
10337#ifdef RT_ARCH_AMD64
10338 if (f64Bit)
10339 {
10340 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
10341 if ((int32_t)offFinalDisp == offFinalDisp)
10342 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
10343 else
10344 {
10345 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
10346 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
10347 }
10348 }
10349 else
10350 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
10351
10352#elif defined(RT_ARCH_ARM64)
10353 if (f64Bit)
10354 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10355 (int64_t)(int32_t)u32Disp + cbInstr);
10356 else
10357 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10358 (int32_t)u32Disp + cbInstr);
10359
10360#else
10361# error "Port me!"
10362#endif
10363 iemNativeRegFreeTmp(pReNative, idxRegPc);
10364 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10365 return off;
10366 }
10367
10368 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10369 int64_t i64EffAddr = 0;
10370 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10371 {
10372 case 0: break;
10373 case 1: i64EffAddr = (int8_t)u32Disp; break;
10374 case 2: i64EffAddr = (int32_t)u32Disp; break;
10375 default: AssertFailed();
10376 }
10377
10378 /* Get the register (or SIB) value. */
10379 uint8_t idxGstRegBase = UINT8_MAX;
10380 uint8_t idxGstRegIndex = UINT8_MAX;
10381 uint8_t cShiftIndex = 0;
10382 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
10383 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
10384 else /* SIB: */
10385 {
10386 /* index /w scaling . */
10387 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10388 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10389 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
10390 if (idxGstRegIndex == 4)
10391 {
10392 /* no index */
10393 cShiftIndex = 0;
10394 idxGstRegIndex = UINT8_MAX;
10395 }
10396
10397 /* base */
10398 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
10399 if (idxGstRegBase == 4)
10400 {
10401 /* pop [rsp] hack */
10402 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
10403 }
10404 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
10405 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
10406 {
10407 /* mod=0 and base=5 -> disp32, no base reg. */
10408 Assert(i64EffAddr == 0);
10409 i64EffAddr = (int32_t)u32Disp;
10410 idxGstRegBase = UINT8_MAX;
10411 }
10412 }
10413
10414 /*
10415 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10416 * the start of the function.
10417 */
10418 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10419 {
10420 if (f64Bit)
10421 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
10422 else
10423 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
10424 return off;
10425 }
10426
10427 /*
10428 * Now emit code that calculates:
10429 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10430 * or if !f64Bit:
10431 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10432 */
10433 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10434 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10435 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10436 kIemNativeGstRegUse_ReadOnly);
10437 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10438 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10439 kIemNativeGstRegUse_ReadOnly);
10440
10441 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10442 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10443 {
10444 idxRegBase = idxRegIndex;
10445 idxRegIndex = UINT8_MAX;
10446 }
10447
10448#ifdef RT_ARCH_AMD64
10449 uint8_t bFinalAdj;
10450 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
10451 bFinalAdj = 0; /* likely */
10452 else
10453 {
10454 /* pop [rsp] with a problematic disp32 value. Split out the
10455 RSP offset and add it separately afterwards (bFinalAdj). */
10456 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
10457 Assert(idxGstRegBase == X86_GREG_xSP);
10458 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
10459 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
10460 Assert(bFinalAdj != 0);
10461 i64EffAddr -= bFinalAdj;
10462 Assert((int32_t)i64EffAddr == i64EffAddr);
10463 }
10464 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
10465//pReNative->pInstrBuf[off++] = 0xcc;
10466
10467 if (idxRegIndex == UINT8_MAX)
10468 {
10469 if (u32EffAddr == 0)
10470 {
10471 /* mov ret, base */
10472 if (f64Bit)
10473 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
10474 else
10475 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10476 }
10477 else
10478 {
10479 /* lea ret, [base + disp32] */
10480 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10481 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10482 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
10483 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10484 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10485 | (f64Bit ? X86_OP_REX_W : 0);
10486 pbCodeBuf[off++] = 0x8d;
10487 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10488 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10489 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10490 else
10491 {
10492 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10493 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10494 }
10495 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10496 if (bMod == X86_MOD_MEM4)
10497 {
10498 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10499 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10500 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10501 }
10502 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10503 }
10504 }
10505 else
10506 {
10507 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10508 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10509 if (idxRegBase == UINT8_MAX)
10510 {
10511 /* lea ret, [(index64 << cShiftIndex) + disp32] */
10512 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
10513 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10514 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10515 | (f64Bit ? X86_OP_REX_W : 0);
10516 pbCodeBuf[off++] = 0x8d;
10517 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10518 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10519 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10520 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10521 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10522 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10523 }
10524 else
10525 {
10526 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10527 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10528 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10529 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10530 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10531 | (f64Bit ? X86_OP_REX_W : 0);
10532 pbCodeBuf[off++] = 0x8d;
10533 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10534 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10535 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10536 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10537 if (bMod != X86_MOD_MEM0)
10538 {
10539 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10540 if (bMod == X86_MOD_MEM4)
10541 {
10542 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10543 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10544 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10545 }
10546 }
10547 }
10548 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10549 }
10550
10551 if (!bFinalAdj)
10552 { /* likely */ }
10553 else
10554 {
10555 Assert(f64Bit);
10556 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
10557 }
10558
10559#elif defined(RT_ARCH_ARM64)
10560 if (i64EffAddr == 0)
10561 {
10562 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10563 if (idxRegIndex == UINT8_MAX)
10564 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
10565 else if (idxRegBase != UINT8_MAX)
10566 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10567 f64Bit, false /*fSetFlags*/, cShiftIndex);
10568 else
10569 {
10570 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
10571 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
10572 }
10573 }
10574 else
10575 {
10576 if (f64Bit)
10577 { /* likely */ }
10578 else
10579 i64EffAddr = (int32_t)i64EffAddr;
10580
10581 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
10582 {
10583 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10584 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
10585 }
10586 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
10587 {
10588 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10589 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
10590 }
10591 else
10592 {
10593 if (f64Bit)
10594 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
10595 else
10596 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
10597 if (idxRegBase != UINT8_MAX)
10598 {
10599 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10600 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
10601 }
10602 }
10603 if (idxRegIndex != UINT8_MAX)
10604 {
10605 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10606 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10607 f64Bit, false /*fSetFlags*/, cShiftIndex);
10608 }
10609 }
10610
10611#else
10612# error "port me"
10613#endif
10614
10615 if (idxRegIndex != UINT8_MAX)
10616 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10617 if (idxRegBase != UINT8_MAX)
10618 iemNativeRegFreeTmp(pReNative, idxRegBase);
10619 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10620 return off;
10621}
10622
10623
10624/*********************************************************************************************************************************
10625* TLB Lookup. *
10626*********************************************************************************************************************************/
10627
10628/**
10629 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
10630 */
10631DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
10632{
10633 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
10634 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
10635 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
10636 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
10637
10638 /* Do the lookup manually. */
10639 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
10640 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
10641 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
10642 if (RT_LIKELY(pTlbe->uTag == uTag))
10643 {
10644 /*
10645 * Check TLB page table level access flags.
10646 */
10647 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
10648 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
10649 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
10650 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
10651 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
10652 | IEMTLBE_F_PG_UNASSIGNED
10653 | IEMTLBE_F_PT_NO_ACCESSED
10654 | fNoWriteNoDirty | fNoUser);
10655 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
10656 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
10657 {
10658 /*
10659 * Return the address.
10660 */
10661 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
10662 if ((uintptr_t)pbAddr == uResult)
10663 return;
10664 RT_NOREF(cbMem);
10665 AssertFailed();
10666 }
10667 else
10668 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
10669 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
10670 }
10671 else
10672 AssertFailed();
10673 RT_BREAKPOINT();
10674}
10675
10676/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
10677
10678
10679/*********************************************************************************************************************************
10680* Memory fetches and stores common *
10681*********************************************************************************************************************************/
10682
10683typedef enum IEMNATIVEMITMEMOP
10684{
10685 kIemNativeEmitMemOp_Store = 0,
10686 kIemNativeEmitMemOp_Fetch,
10687 kIemNativeEmitMemOp_Fetch_Zx_U16,
10688 kIemNativeEmitMemOp_Fetch_Zx_U32,
10689 kIemNativeEmitMemOp_Fetch_Zx_U64,
10690 kIemNativeEmitMemOp_Fetch_Sx_U16,
10691 kIemNativeEmitMemOp_Fetch_Sx_U32,
10692 kIemNativeEmitMemOp_Fetch_Sx_U64
10693} IEMNATIVEMITMEMOP;
10694
10695/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
10696 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
10697 * (with iSegReg = UINT8_MAX). */
10698DECL_INLINE_THROW(uint32_t)
10699iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
10700 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
10701 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
10702{
10703 /*
10704 * Assert sanity.
10705 */
10706 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10707 Assert( enmOp != kIemNativeEmitMemOp_Store
10708 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
10709 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
10710 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
10711 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10712 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
10713 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10714 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
10715 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
10716 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
10717#ifdef VBOX_STRICT
10718 if (iSegReg == UINT8_MAX)
10719 {
10720 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10721 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10722 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10723 switch (cbMem)
10724 {
10725 case 1:
10726 Assert( pfnFunction
10727 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
10728 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10729 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10730 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10731 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10732 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
10733 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
10734 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
10735 : UINT64_C(0xc000b000a0009000) ));
10736 break;
10737 case 2:
10738 Assert( pfnFunction
10739 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
10740 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10741 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10742 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10743 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
10744 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
10745 : UINT64_C(0xc000b000a0009000) ));
10746 break;
10747 case 4:
10748 Assert( pfnFunction
10749 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
10750 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10751 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10752 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
10753 : UINT64_C(0xc000b000a0009000) ));
10754 break;
10755 case 8:
10756 Assert( pfnFunction
10757 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
10758 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
10759 : UINT64_C(0xc000b000a0009000) ));
10760 break;
10761 }
10762 }
10763 else
10764 {
10765 Assert(iSegReg < 6);
10766 switch (cbMem)
10767 {
10768 case 1:
10769 Assert( pfnFunction
10770 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
10771 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
10772 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10773 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10774 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10775 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
10776 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
10777 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
10778 : UINT64_C(0xc000b000a0009000) ));
10779 break;
10780 case 2:
10781 Assert( pfnFunction
10782 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
10783 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
10784 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
10785 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
10786 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
10787 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
10788 : UINT64_C(0xc000b000a0009000) ));
10789 break;
10790 case 4:
10791 Assert( pfnFunction
10792 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
10793 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
10794 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
10795 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
10796 : UINT64_C(0xc000b000a0009000) ));
10797 break;
10798 case 8:
10799 Assert( pfnFunction
10800 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
10801 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
10802 : UINT64_C(0xc000b000a0009000) ));
10803 break;
10804 }
10805 }
10806#endif
10807
10808#ifdef VBOX_STRICT
10809 /*
10810 * Check that the fExec flags we've got make sense.
10811 */
10812 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10813#endif
10814
10815 /*
10816 * To keep things simple we have to commit any pending writes first as we
10817 * may end up making calls.
10818 */
10819 /** @todo we could postpone this till we make the call and reload the
10820 * registers after returning from the call. Not sure if that's sensible or
10821 * not, though. */
10822 off = iemNativeRegFlushPendingWrites(pReNative, off);
10823
10824#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10825 /*
10826 * Move/spill/flush stuff out of call-volatile registers.
10827 * This is the easy way out. We could contain this to the tlb-miss branch
10828 * by saving and restoring active stuff here.
10829 */
10830 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10831#endif
10832
10833 /*
10834 * Define labels and allocate the result register (trying for the return
10835 * register if we can).
10836 */
10837 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10838 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
10839 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
10840 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
10841 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
10842 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
10843 uint8_t const idxRegValueStore = !TlbState.fSkip
10844 && enmOp == kIemNativeEmitMemOp_Store
10845 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
10846 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
10847 : UINT8_MAX;
10848 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
10849 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
10850 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
10851 : UINT32_MAX;
10852
10853 /*
10854 * Jump to the TLB lookup code.
10855 */
10856 if (!TlbState.fSkip)
10857 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
10858
10859 /*
10860 * TlbMiss:
10861 *
10862 * Call helper to do the fetching.
10863 * We flush all guest register shadow copies here.
10864 */
10865 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
10866
10867#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10868 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10869#else
10870 RT_NOREF(idxInstr);
10871#endif
10872
10873#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10874 /* Save variables in volatile registers. */
10875 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
10876 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
10877 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
10878 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
10879#endif
10880
10881 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
10882 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
10883 if (enmOp == kIemNativeEmitMemOp_Store)
10884 {
10885 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
10886 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
10887#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10888 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
10889#else
10890 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
10891 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
10892#endif
10893 }
10894
10895 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
10896 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
10897#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10898 fVolGregMask);
10899#else
10900 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
10901#endif
10902
10903 if (iSegReg != UINT8_MAX)
10904 {
10905 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
10906 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
10907 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
10908 }
10909
10910 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10911 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10912
10913 /* Done setting up parameters, make the call. */
10914 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10915
10916 /*
10917 * Put the result in the right register if this is a fetch.
10918 */
10919 if (enmOp != kIemNativeEmitMemOp_Store)
10920 {
10921 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
10922 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
10923 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
10924 }
10925
10926#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10927 /* Restore variables and guest shadow registers to volatile registers. */
10928 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
10929 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
10930#endif
10931
10932#ifdef IEMNATIVE_WITH_TLB_LOOKUP
10933 if (!TlbState.fSkip)
10934 {
10935 /* end of TlbMiss - Jump to the done label. */
10936 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10937 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
10938
10939 /*
10940 * TlbLookup:
10941 */
10942 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
10943 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
10944 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
10945
10946 /*
10947 * Emit code to do the actual storing / fetching.
10948 */
10949 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
10950# ifdef VBOX_WITH_STATISTICS
10951 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
10952 enmOp == kIemNativeEmitMemOp_Store
10953 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
10954 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
10955# endif
10956 switch (enmOp)
10957 {
10958 case kIemNativeEmitMemOp_Store:
10959 if (pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate)
10960 {
10961 switch (cbMem)
10962 {
10963 case 1:
10964 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
10965 break;
10966 case 2:
10967 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
10968 break;
10969 case 4:
10970 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
10971 break;
10972 case 8:
10973 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
10974 break;
10975 default:
10976 AssertFailed();
10977 }
10978 }
10979 else
10980 {
10981 switch (cbMem)
10982 {
10983 case 1:
10984 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off,
10985 (uint8_t)pReNative->Core.aVars[idxVarValue].u.uValue,
10986 idxRegMemResult, TlbState.idxReg1);
10987 break;
10988 case 2:
10989 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
10990 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
10991 idxRegMemResult, TlbState.idxReg1);
10992 break;
10993 case 4:
10994 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
10995 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
10996 idxRegMemResult, TlbState.idxReg1);
10997 break;
10998 case 8:
10999 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11000 idxRegMemResult, TlbState.idxReg1);
11001 break;
11002 default:
11003 AssertFailed();
11004 }
11005 }
11006 break;
11007
11008 case kIemNativeEmitMemOp_Fetch:
11009 case kIemNativeEmitMemOp_Fetch_Zx_U16:
11010 case kIemNativeEmitMemOp_Fetch_Zx_U32:
11011 case kIemNativeEmitMemOp_Fetch_Zx_U64:
11012 switch (cbMem)
11013 {
11014 case 1:
11015 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11016 break;
11017 case 2:
11018 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11019 break;
11020 case 4:
11021 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11022 break;
11023 case 8:
11024 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11025 break;
11026 default:
11027 AssertFailed();
11028 }
11029 break;
11030
11031 case kIemNativeEmitMemOp_Fetch_Sx_U16:
11032 Assert(cbMem == 1);
11033 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11034 break;
11035
11036 case kIemNativeEmitMemOp_Fetch_Sx_U32:
11037 Assert(cbMem == 1 || cbMem == 2);
11038 if (cbMem == 1)
11039 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11040 else
11041 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11042 break;
11043
11044 case kIemNativeEmitMemOp_Fetch_Sx_U64:
11045 switch (cbMem)
11046 {
11047 case 1:
11048 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11049 break;
11050 case 2:
11051 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11052 break;
11053 case 4:
11054 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11055 break;
11056 default:
11057 AssertFailed();
11058 }
11059 break;
11060
11061 default:
11062 AssertFailed();
11063 }
11064
11065 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11066
11067 /*
11068 * TlbDone:
11069 */
11070 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11071
11072 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
11073
11074# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11075 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
11076 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11077# endif
11078 }
11079#else
11080 RT_NOREF(fAlignMask, idxLabelTlbMiss);
11081#endif
11082
11083 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
11084 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11085 return off;
11086}
11087
11088
11089
11090/*********************************************************************************************************************************
11091* Memory fetches (IEM_MEM_FETCH_XXX). *
11092*********************************************************************************************************************************/
11093
11094/* 8-bit segmented: */
11095#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
11096 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
11097 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11098 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11099
11100#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11101 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11102 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11103 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11104
11105#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11106 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11107 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11108 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11109
11110#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11111 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11112 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11113 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11114
11115#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11116 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11117 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11118 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11119
11120#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11122 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11123 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11124
11125#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11126 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11127 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11128 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11129
11130/* 16-bit segmented: */
11131#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11132 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11133 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11134 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11135
11136#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11137 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11138 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11139 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11140
11141#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11142 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11143 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11144 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11145
11146#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11147 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11148 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11149 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11150
11151#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11152 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11153 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11154 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11155
11156#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11157 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11158 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11159 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11160
11161
11162/* 32-bit segmented: */
11163#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11164 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11165 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11166 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11167
11168#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11169 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11170 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11171 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11172
11173#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11174 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11175 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11176 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11177
11178#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11179 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11180 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11181 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11182
11183
11184/* 64-bit segmented: */
11185#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11186 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11187 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11188 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
11189
11190
11191
11192/* 8-bit flat: */
11193#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
11194 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
11195 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11196 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11197
11198#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
11199 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11200 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11201 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11202
11203#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
11204 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11205 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11206 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11207
11208#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
11209 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11210 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11211 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11212
11213#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
11214 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11215 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11216 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11217
11218#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
11219 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11220 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11221 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11222
11223#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
11224 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11225 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11226 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11227
11228
11229/* 16-bit flat: */
11230#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
11231 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11232 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11233 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11234
11235#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
11236 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11237 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11238 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11239
11240#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
11241 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11242 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11243 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11244
11245#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
11246 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11247 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11248 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11249
11250#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
11251 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11252 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11253 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11254
11255#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
11256 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11257 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11258 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11259
11260/* 32-bit flat: */
11261#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
11262 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11263 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11264 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11265
11266#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
11267 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11268 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11269 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11270
11271#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
11272 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11273 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11274 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11275
11276#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
11277 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11278 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11279 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11280
11281/* 64-bit flat: */
11282#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
11283 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11284 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11285 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
11286
11287
11288
11289/*********************************************************************************************************************************
11290* Memory stores (IEM_MEM_STORE_XXX). *
11291*********************************************************************************************************************************/
11292
11293#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
11294 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
11295 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11296 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11297
11298#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
11299 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
11300 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11301 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11302
11303#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
11304 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
11305 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11306 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11307
11308#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
11309 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
11310 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11311 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11312
11313
11314#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
11315 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
11316 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11317 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11318
11319#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
11320 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
11321 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11322 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11323
11324#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
11325 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
11326 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11327 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11328
11329#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
11330 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
11331 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11332 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11333
11334
11335#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
11336 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11337 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11338
11339#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
11340 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11341 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11342
11343#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
11344 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11345 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11346
11347#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
11348 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11349 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11350
11351
11352#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
11353 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11354 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11355
11356#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
11357 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11358 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11359
11360#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
11361 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11362 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11363
11364#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
11365 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11366 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11367
11368/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
11369 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
11370DECL_INLINE_THROW(uint32_t)
11371iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
11372 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
11373{
11374 /*
11375 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
11376 * to do the grunt work.
11377 */
11378 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
11379 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
11380 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
11381 pfnFunction, idxInstr);
11382 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
11383 return off;
11384}
11385
11386
11387
11388/*********************************************************************************************************************************
11389* Stack Accesses. *
11390*********************************************************************************************************************************/
11391/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
11392#define IEM_MC_PUSH_U16(a_u16Value) \
11393 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11394 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
11395#define IEM_MC_PUSH_U32(a_u32Value) \
11396 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11397 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
11398#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
11399 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
11400 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
11401#define IEM_MC_PUSH_U64(a_u64Value) \
11402 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11403 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
11404
11405#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
11406 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11407 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11408#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
11409 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11410 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
11411#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
11412 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
11413 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
11414
11415#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
11416 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11417 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11418#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
11419 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11420 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
11421
11422
11423DECL_FORCE_INLINE_THROW(uint32_t)
11424iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11425{
11426 /* Use16BitSp: */
11427#ifdef RT_ARCH_AMD64
11428 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
11429 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11430#else
11431 /* sub regeff, regrsp, #cbMem */
11432 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
11433 /* and regeff, regeff, #0xffff */
11434 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
11435 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
11436 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11437 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
11438#endif
11439 return off;
11440}
11441
11442
11443DECL_FORCE_INLINE(uint32_t)
11444iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11445{
11446 /* Use32BitSp: */
11447 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
11448 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11449 return off;
11450}
11451
11452
11453/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
11454DECL_INLINE_THROW(uint32_t)
11455iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
11456 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11457{
11458 /*
11459 * Assert sanity.
11460 */
11461 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11462#ifdef VBOX_STRICT
11463 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11464 {
11465 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11466 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11467 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11468 Assert( pfnFunction
11469 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11470 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
11471 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
11472 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11473 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
11474 : UINT64_C(0xc000b000a0009000) ));
11475 }
11476 else
11477 Assert( pfnFunction
11478 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
11479 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
11480 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
11481 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
11482 : UINT64_C(0xc000b000a0009000) ));
11483#endif
11484
11485#ifdef VBOX_STRICT
11486 /*
11487 * Check that the fExec flags we've got make sense.
11488 */
11489 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11490#endif
11491
11492 /*
11493 * To keep things simple we have to commit any pending writes first as we
11494 * may end up making calls.
11495 */
11496 /** @todo we could postpone this till we make the call and reload the
11497 * registers after returning from the call. Not sure if that's sensible or
11498 * not, though. */
11499 off = iemNativeRegFlushPendingWrites(pReNative, off);
11500
11501 /*
11502 * First we calculate the new RSP and the effective stack pointer value.
11503 * For 64-bit mode and flat 32-bit these two are the same.
11504 * (Code structure is very similar to that of PUSH)
11505 */
11506 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
11507 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
11508 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
11509 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
11510 ? cbMem : sizeof(uint16_t);
11511 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
11512 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
11513 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11514 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
11515 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
11516 if (cBitsFlat != 0)
11517 {
11518 Assert(idxRegEffSp == idxRegRsp);
11519 Assert(cBitsFlat == 32 || cBitsFlat == 64);
11520 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
11521 if (cBitsFlat == 64)
11522 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
11523 else
11524 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
11525 }
11526 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
11527 {
11528 Assert(idxRegEffSp != idxRegRsp);
11529 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
11530 kIemNativeGstRegUse_ReadOnly);
11531#ifdef RT_ARCH_AMD64
11532 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11533#else
11534 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11535#endif
11536 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
11537 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
11538 offFixupJumpToUseOtherBitSp = off;
11539 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11540 {
11541 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
11542 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11543 }
11544 else
11545 {
11546 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
11547 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11548 }
11549 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11550 }
11551 /* SpUpdateEnd: */
11552 uint32_t const offLabelSpUpdateEnd = off;
11553
11554 /*
11555 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
11556 * we're skipping lookup).
11557 */
11558 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
11559 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
11560 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11561 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11562 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11563 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11564 : UINT32_MAX;
11565 uint8_t const idxRegValue = !TlbState.fSkip
11566 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11567 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
11568 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
11569 : UINT8_MAX;
11570 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11571
11572
11573 if (!TlbState.fSkip)
11574 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11575 else
11576 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
11577
11578 /*
11579 * Use16BitSp:
11580 */
11581 if (cBitsFlat == 0)
11582 {
11583#ifdef RT_ARCH_AMD64
11584 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11585#else
11586 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11587#endif
11588 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
11589 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11590 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11591 else
11592 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11593 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
11594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11595 }
11596
11597 /*
11598 * TlbMiss:
11599 *
11600 * Call helper to do the pushing.
11601 */
11602 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11603
11604#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11605 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11606#else
11607 RT_NOREF(idxInstr);
11608#endif
11609
11610 /* Save variables in volatile registers. */
11611 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11612 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
11613 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
11614 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
11615 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11616
11617 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
11618 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
11619 {
11620 /* Swap them using ARG0 as temp register: */
11621 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
11622 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
11623 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
11624 }
11625 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
11626 {
11627 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
11628 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
11629 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11630
11631 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
11632 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
11633 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11634 }
11635 else
11636 {
11637 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
11638 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11639
11640 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
11641 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
11642 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
11643 }
11644
11645 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11646 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11647
11648 /* Done setting up parameters, make the call. */
11649 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11650
11651 /* Restore variables and guest shadow registers to volatile registers. */
11652 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11653 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11654
11655#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11656 if (!TlbState.fSkip)
11657 {
11658 /* end of TlbMiss - Jump to the done label. */
11659 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11660 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11661
11662 /*
11663 * TlbLookup:
11664 */
11665 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
11666 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
11667
11668 /*
11669 * Emit code to do the actual storing / fetching.
11670 */
11671 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11672# ifdef VBOX_WITH_STATISTICS
11673 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11674 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
11675# endif
11676 if (idxRegValue != UINT8_MAX)
11677 {
11678 switch (cbMemAccess)
11679 {
11680 case 2:
11681 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11682 break;
11683 case 4:
11684 if (!fIsIntelSeg)
11685 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11686 else
11687 {
11688 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
11689 PUSH FS in real mode, so we have to try emulate that here.
11690 We borrow the now unused idxReg1 from the TLB lookup code here. */
11691 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
11692 kIemNativeGstReg_EFlags);
11693 if (idxRegEfl != UINT8_MAX)
11694 {
11695#ifdef ARCH_AMD64
11696 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
11697 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
11698 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11699#else
11700 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
11701 off, TlbState.idxReg1, idxRegEfl,
11702 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11703#endif
11704 iemNativeRegFreeTmp(pReNative, idxRegEfl);
11705 }
11706 else
11707 {
11708 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
11709 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11710 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
11711 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11712 }
11713 /* ASSUMES the upper half of idxRegValue is ZERO. */
11714 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
11715 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
11716 }
11717 break;
11718 case 8:
11719 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11720 break;
11721 default:
11722 AssertFailed();
11723 }
11724 }
11725 else
11726 {
11727 switch (cbMemAccess)
11728 {
11729 case 2:
11730 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
11731 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11732 idxRegMemResult, TlbState.idxReg1);
11733 break;
11734 case 4:
11735 Assert(!fIsSegReg);
11736 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
11737 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11738 idxRegMemResult, TlbState.idxReg1);
11739 break;
11740 case 8:
11741 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11742 idxRegMemResult, TlbState.idxReg1);
11743 break;
11744 default:
11745 AssertFailed();
11746 }
11747 }
11748
11749 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11750 TlbState.freeRegsAndReleaseVars(pReNative);
11751
11752 /*
11753 * TlbDone:
11754 *
11755 * Commit the new RSP value.
11756 */
11757 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11758 }
11759#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
11760
11761 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
11762 iemNativeRegFreeTmp(pReNative, idxRegRsp);
11763 if (idxRegEffSp != idxRegRsp)
11764 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
11765
11766 /* The value variable is implictly flushed. */
11767 if (idxRegValue != UINT8_MAX)
11768 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11769 iemNativeVarFreeLocal(pReNative, idxVarValue);
11770
11771 return off;
11772}
11773
11774
11775
11776/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
11777#define IEM_MC_POP_GREG_U16(a_iGReg) \
11778 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11779 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
11780#define IEM_MC_POP_GREG_U32(a_iGReg) \
11781 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11782 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
11783#define IEM_MC_POP_GREG_U64(a_iGReg) \
11784 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11785 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
11786
11787#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
11788 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11789 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
11790#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
11791 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11792 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
11793
11794#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
11795 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11796 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
11797#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
11798 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11799 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
11800
11801
11802DECL_FORCE_INLINE_THROW(uint32_t)
11803iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
11804 uint8_t idxRegTmp)
11805{
11806 /* Use16BitSp: */
11807#ifdef RT_ARCH_AMD64
11808 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11809 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
11810 RT_NOREF(idxRegTmp);
11811#else
11812 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
11813 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
11814 /* add tmp, regrsp, #cbMem */
11815 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
11816 /* and tmp, tmp, #0xffff */
11817 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
11818 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
11819 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
11820 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
11821#endif
11822 return off;
11823}
11824
11825
11826DECL_FORCE_INLINE(uint32_t)
11827iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11828{
11829 /* Use32BitSp: */
11830 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11831 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
11832 return off;
11833}
11834
11835
11836/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
11837DECL_INLINE_THROW(uint32_t)
11838iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
11839 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11840{
11841 /*
11842 * Assert sanity.
11843 */
11844 Assert(idxGReg < 16);
11845#ifdef VBOX_STRICT
11846 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11847 {
11848 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11849 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11850 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11851 Assert( pfnFunction
11852 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
11853 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
11854 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
11855 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
11856 : UINT64_C(0xc000b000a0009000) ));
11857 }
11858 else
11859 Assert( pfnFunction
11860 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
11861 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
11862 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
11863 : UINT64_C(0xc000b000a0009000) ));
11864#endif
11865
11866#ifdef VBOX_STRICT
11867 /*
11868 * Check that the fExec flags we've got make sense.
11869 */
11870 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11871#endif
11872
11873 /*
11874 * To keep things simple we have to commit any pending writes first as we
11875 * may end up making calls.
11876 */
11877 off = iemNativeRegFlushPendingWrites(pReNative, off);
11878
11879 /*
11880 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
11881 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
11882 * directly as the effective stack pointer.
11883 * (Code structure is very similar to that of PUSH)
11884 */
11885 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
11886 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
11887 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
11888 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11889 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
11890 /** @todo can do a better job picking the register here. For cbMem >= 4 this
11891 * will be the resulting register value. */
11892 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
11893
11894 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
11895 if (cBitsFlat != 0)
11896 {
11897 Assert(idxRegEffSp == idxRegRsp);
11898 Assert(cBitsFlat == 32 || cBitsFlat == 64);
11899 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
11900 }
11901 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
11902 {
11903 Assert(idxRegEffSp != idxRegRsp);
11904 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
11905 kIemNativeGstRegUse_ReadOnly);
11906#ifdef RT_ARCH_AMD64
11907 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11908#else
11909 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11910#endif
11911 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
11912 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
11913 offFixupJumpToUseOtherBitSp = off;
11914 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11915 {
11916/** @todo can skip idxRegRsp updating when popping ESP. */
11917 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
11918 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11919 }
11920 else
11921 {
11922 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
11923 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
11924 }
11925 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11926 }
11927 /* SpUpdateEnd: */
11928 uint32_t const offLabelSpUpdateEnd = off;
11929
11930 /*
11931 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
11932 * we're skipping lookup).
11933 */
11934 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
11935 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
11936 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11937 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11938 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11939 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11940 : UINT32_MAX;
11941
11942 if (!TlbState.fSkip)
11943 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11944 else
11945 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
11946
11947 /*
11948 * Use16BitSp:
11949 */
11950 if (cBitsFlat == 0)
11951 {
11952#ifdef RT_ARCH_AMD64
11953 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11954#else
11955 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11956#endif
11957 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
11958 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11959 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
11960 else
11961 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11962 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
11963 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11964 }
11965
11966 /*
11967 * TlbMiss:
11968 *
11969 * Call helper to do the pushing.
11970 */
11971 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11972
11973#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11974 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11975#else
11976 RT_NOREF(idxInstr);
11977#endif
11978
11979 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11980 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
11981 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
11982 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11983
11984
11985 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
11986 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
11987 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11988
11989 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11990 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11991
11992 /* Done setting up parameters, make the call. */
11993 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11994
11995 /* Move the return register content to idxRegMemResult. */
11996 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
11997 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
11998
11999 /* Restore variables and guest shadow registers to volatile registers. */
12000 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12001 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12002
12003#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12004 if (!TlbState.fSkip)
12005 {
12006 /* end of TlbMiss - Jump to the done label. */
12007 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12008 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12009
12010 /*
12011 * TlbLookup:
12012 */
12013 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
12014 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12015
12016 /*
12017 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
12018 */
12019 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12020# ifdef VBOX_WITH_STATISTICS
12021 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12022 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12023# endif
12024 switch (cbMem)
12025 {
12026 case 2:
12027 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12028 break;
12029 case 4:
12030 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12031 break;
12032 case 8:
12033 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12034 break;
12035 default:
12036 AssertFailed();
12037 }
12038
12039 TlbState.freeRegsAndReleaseVars(pReNative);
12040
12041 /*
12042 * TlbDone:
12043 *
12044 * Set the new RSP value (FLAT accesses needs to calculate it first) and
12045 * commit the popped register value.
12046 */
12047 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12048 }
12049#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12050
12051 if (idxGReg != X86_GREG_xSP)
12052 {
12053 /* Set the register. */
12054 if (cbMem >= sizeof(uint32_t))
12055 {
12056 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
12057 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
12058 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12059 }
12060 else
12061 {
12062 Assert(cbMem == sizeof(uint16_t));
12063 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
12064 kIemNativeGstRegUse_ForUpdate);
12065 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
12066 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12067 iemNativeRegFreeTmp(pReNative, idxRegDst);
12068 }
12069
12070 /* Complete RSP calculation for FLAT mode. */
12071 if (idxRegEffSp == idxRegRsp)
12072 {
12073 if (cBitsFlat == 64)
12074 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12075 else
12076 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12077 }
12078 }
12079 else
12080 {
12081 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
12082 if (cbMem == sizeof(uint64_t))
12083 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
12084 else if (cbMem == sizeof(uint32_t))
12085 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
12086 else
12087 {
12088 if (idxRegEffSp == idxRegRsp)
12089 {
12090 if (cBitsFlat == 64)
12091 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12092 else
12093 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12094 }
12095 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
12096 }
12097 }
12098 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
12099
12100 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12101 if (idxRegEffSp != idxRegRsp)
12102 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12103 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12104
12105 return off;
12106}
12107
12108
12109
12110/*********************************************************************************************************************************
12111* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
12112*********************************************************************************************************************************/
12113
12114#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12115 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12116 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12117 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
12118
12119#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12120 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12121 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12122 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
12123
12124#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12125 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12126 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12127 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
12128
12129#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12130 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12131 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12132 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
12133
12134
12135#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12136 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12137 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12138 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
12139
12140#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12141 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12142 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12143 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
12144
12145#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12146 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12147 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12148 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12149
12150#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12151 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12152 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12153 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
12154
12155#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12156 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
12157 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12158 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12159
12160
12161#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12162 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12163 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12164 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
12165
12166#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12167 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12168 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12169 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
12170
12171#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12172 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12173 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12174 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12175
12176#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12177 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12178 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12179 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
12180
12181#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12182 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
12183 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12184 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12185
12186
12187#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12188 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12189 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12190 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
12191
12192#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12193 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12194 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12195 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
12196#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12197 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12198 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12199 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12200
12201#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12202 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12203 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12204 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
12205
12206#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12207 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
12208 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12209 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12210
12211
12212#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12213 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12214 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12215 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
12216
12217#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12218 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12219 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12220 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
12221
12222
12223#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12224 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12225 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12226 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
12227
12228#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12229 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12230 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12231 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
12232
12233#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12234 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12235 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12236 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
12237
12238#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12239 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12240 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12241 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
12242
12243
12244
12245#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12246 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12247 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12248 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
12249
12250#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12251 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12252 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12253 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
12254
12255#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12256 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12257 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12258 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
12259
12260#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12261 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12262 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12263 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
12264
12265
12266#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12267 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12268 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12269 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
12270
12271#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12272 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12273 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12274 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
12275
12276#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12277 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12278 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12279 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12280
12281#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12282 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12283 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12284 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
12285
12286#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
12287 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
12288 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12289 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12290
12291
12292#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12293 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12294 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12295 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
12296
12297#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12298 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12299 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12300 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
12301
12302#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12303 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12304 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12305 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12306
12307#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12308 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12309 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12310 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
12311
12312#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
12313 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
12314 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12315 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12316
12317
12318#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12319 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12320 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12321 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
12322
12323#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12324 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12325 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12326 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
12327
12328#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12329 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12330 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12331 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12332
12333#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12334 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12335 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12336 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
12337
12338#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
12339 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
12340 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12341 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12342
12343
12344#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
12345 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12346 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12347 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
12348
12349#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
12350 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12351 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12352 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
12353
12354
12355#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12356 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12357 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12358 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
12359
12360#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12361 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12362 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12363 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
12364
12365#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12366 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12367 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12368 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
12369
12370#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12371 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12372 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12373 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
12374
12375
12376DECL_INLINE_THROW(uint32_t)
12377iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
12378 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
12379 uintptr_t pfnFunction, uint8_t idxInstr)
12380{
12381 /*
12382 * Assert sanity.
12383 */
12384 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
12385 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
12386 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
12387 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12388
12389 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12390 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
12391 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
12392 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12393
12394 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12395 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
12396 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
12397 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12398
12399 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12400
12401 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12402
12403#ifdef VBOX_STRICT
12404# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
12405 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
12406 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
12407 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
12408 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
12409# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
12410 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
12411 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
12412 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
12413
12414 if (iSegReg == UINT8_MAX)
12415 {
12416 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12417 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12418 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12419 switch (cbMem)
12420 {
12421 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
12422 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
12423 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
12424 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
12425 case 10:
12426 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
12427 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
12428 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12429 break;
12430 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
12431# if 0
12432 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
12433 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
12434# endif
12435 default: AssertFailed(); break;
12436 }
12437 }
12438 else
12439 {
12440 Assert(iSegReg < 6);
12441 switch (cbMem)
12442 {
12443 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
12444 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
12445 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
12446 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
12447 case 10:
12448 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
12449 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
12450 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12451 break;
12452 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
12453# if 0
12454 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
12455 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
12456# endif
12457 default: AssertFailed(); break;
12458 }
12459 }
12460# undef IEM_MAP_HLP_FN
12461# undef IEM_MAP_HLP_FN_NO_AT
12462#endif
12463
12464#ifdef VBOX_STRICT
12465 /*
12466 * Check that the fExec flags we've got make sense.
12467 */
12468 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12469#endif
12470
12471 /*
12472 * To keep things simple we have to commit any pending writes first as we
12473 * may end up making calls.
12474 */
12475 off = iemNativeRegFlushPendingWrites(pReNative, off);
12476
12477#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12478 /*
12479 * Move/spill/flush stuff out of call-volatile registers.
12480 * This is the easy way out. We could contain this to the tlb-miss branch
12481 * by saving and restoring active stuff here.
12482 */
12483 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
12484 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
12485#endif
12486
12487 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
12488 while the tlb-miss codepath will temporarily put it on the stack.
12489 Set the the type to stack here so we don't need to do it twice below. */
12490 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
12491 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
12492 /** @todo use a tmp register from TlbState, since they'll be free after tlb
12493 * lookup is done. */
12494
12495 /*
12496 * Define labels and allocate the result register (trying for the return
12497 * register if we can).
12498 */
12499 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12500 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
12501 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
12502 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
12503 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
12504 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12505 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12506 : UINT32_MAX;
12507//off=iemNativeEmitBrk(pReNative, off, 0);
12508 /*
12509 * Jump to the TLB lookup code.
12510 */
12511 if (!TlbState.fSkip)
12512 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12513
12514 /*
12515 * TlbMiss:
12516 *
12517 * Call helper to do the fetching.
12518 * We flush all guest register shadow copies here.
12519 */
12520 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
12521
12522#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12523 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12524#else
12525 RT_NOREF(idxInstr);
12526#endif
12527
12528#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12529 /* Save variables in volatile registers. */
12530 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
12531 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12532#endif
12533
12534 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
12535 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
12536#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12537 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
12538#else
12539 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12540#endif
12541
12542 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
12543 if (iSegReg != UINT8_MAX)
12544 {
12545 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
12546 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
12547 }
12548
12549 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
12550 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
12551 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
12552
12553 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12554 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12555
12556 /* Done setting up parameters, make the call. */
12557 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12558
12559 /*
12560 * Put the output in the right registers.
12561 */
12562 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
12563 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12564 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12565
12566#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12567 /* Restore variables and guest shadow registers to volatile registers. */
12568 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12569 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12570#endif
12571
12572 Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
12573 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
12574
12575#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12576 if (!TlbState.fSkip)
12577 {
12578 /* end of tlbsmiss - Jump to the done label. */
12579 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12580 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12581
12582 /*
12583 * TlbLookup:
12584 */
12585 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
12586 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12587# ifdef VBOX_WITH_STATISTICS
12588 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
12589 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
12590# endif
12591
12592 /* [idxVarUnmapInfo] = 0; */
12593 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
12594
12595 /*
12596 * TlbDone:
12597 */
12598 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12599
12600 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12601
12602# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12603 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12604 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12605# endif
12606 }
12607#else
12608 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
12609#endif
12610
12611 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12612 iemNativeVarRegisterRelease(pReNative, idxVarMem);
12613
12614 return off;
12615}
12616
12617
12618#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
12619 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
12620 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
12621
12622#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
12623 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
12624 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
12625
12626#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
12627 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
12628 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
12629
12630#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
12631 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
12632 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
12633
12634DECL_INLINE_THROW(uint32_t)
12635iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
12636 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
12637{
12638 /*
12639 * Assert sanity.
12640 */
12641 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12642 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
12643 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
12644 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
12645#ifdef VBOX_STRICT
12646 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
12647 {
12648 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
12649 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
12650 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
12651 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
12652 case IEM_ACCESS_TYPE_WRITE:
12653 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
12654 case IEM_ACCESS_TYPE_READ:
12655 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
12656 default: AssertFailed();
12657 }
12658#else
12659 RT_NOREF(fAccess);
12660#endif
12661
12662 /*
12663 * To keep things simple we have to commit any pending writes first as we
12664 * may end up making calls (there shouldn't be any at this point, so this
12665 * is just for consistency).
12666 */
12667 /** @todo we could postpone this till we make the call and reload the
12668 * registers after returning from the call. Not sure if that's sensible or
12669 * not, though. */
12670 off = iemNativeRegFlushPendingWrites(pReNative, off);
12671
12672 /*
12673 * Move/spill/flush stuff out of call-volatile registers.
12674 *
12675 * We exclude any register holding the bUnmapInfo variable, as we'll be
12676 * checking it after returning from the call and will free it afterwards.
12677 */
12678 /** @todo save+restore active registers and maybe guest shadows in miss
12679 * scenario. */
12680 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
12681
12682 /*
12683 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
12684 * to call the unmap helper function.
12685 *
12686 * The likelyhood of it being zero is higher than for the TLB hit when doing
12687 * the mapping, as a TLB miss for an well aligned and unproblematic memory
12688 * access should also end up with a mapping that won't need special unmapping.
12689 */
12690 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
12691 * should speed up things for the pure interpreter as well when TLBs
12692 * are enabled. */
12693#ifdef RT_ARCH_AMD64
12694 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
12695 {
12696 /* test byte [rbp - xxx], 0ffh */
12697 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
12698 pbCodeBuf[off++] = 0xf6;
12699 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
12700 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
12701 pbCodeBuf[off++] = 0xff;
12702 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12703 }
12704 else
12705#endif
12706 {
12707 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
12708 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
12709 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
12710 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12711 }
12712 uint32_t const offJmpFixup = off;
12713 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
12714
12715 /*
12716 * Call the unmap helper function.
12717 */
12718#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
12719 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12720#else
12721 RT_NOREF(idxInstr);
12722#endif
12723
12724 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
12725 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
12726 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12727
12728 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12729 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12730
12731 /* Done setting up parameters, make the call. */
12732 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12733
12734 /* The bUnmapInfo variable is implictly free by these MCs. */
12735 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
12736
12737 /*
12738 * Done, just fixup the jump for the non-call case.
12739 */
12740 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
12741
12742 return off;
12743}
12744
12745
12746
12747/*********************************************************************************************************************************
12748* State and Exceptions *
12749*********************************************************************************************************************************/
12750
12751#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12752#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12753
12754#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12755#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12756#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12757
12758#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12759#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12760#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12761
12762
12763DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
12764{
12765 /** @todo this needs a lot more work later. */
12766 RT_NOREF(pReNative, fForChange);
12767 return off;
12768}
12769
12770
12771/*********************************************************************************************************************************
12772* The native code generator functions for each MC block. *
12773*********************************************************************************************************************************/
12774
12775
12776/*
12777 * Include g_apfnIemNativeRecompileFunctions and associated functions.
12778 *
12779 * This should probably live in it's own file later, but lets see what the
12780 * compile times turn out to be first.
12781 */
12782#include "IEMNativeFunctions.cpp.h"
12783
12784
12785
12786/*********************************************************************************************************************************
12787* Recompiler Core. *
12788*********************************************************************************************************************************/
12789
12790
12791/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
12792static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
12793{
12794 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
12795 pDis->cbCachedInstr += cbMaxRead;
12796 RT_NOREF(cbMinRead);
12797 return VERR_NO_DATA;
12798}
12799
12800
12801/**
12802 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
12803 * @returns pszBuf.
12804 * @param fFlags The flags.
12805 * @param pszBuf The output buffer.
12806 * @param cbBuf The output buffer size. At least 32 bytes.
12807 */
12808DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
12809{
12810 Assert(cbBuf >= 32);
12811 static RTSTRTUPLE const s_aModes[] =
12812 {
12813 /* [00] = */ { RT_STR_TUPLE("16BIT") },
12814 /* [01] = */ { RT_STR_TUPLE("32BIT") },
12815 /* [02] = */ { RT_STR_TUPLE("!2!") },
12816 /* [03] = */ { RT_STR_TUPLE("!3!") },
12817 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
12818 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
12819 /* [06] = */ { RT_STR_TUPLE("!6!") },
12820 /* [07] = */ { RT_STR_TUPLE("!7!") },
12821 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
12822 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
12823 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
12824 /* [0b] = */ { RT_STR_TUPLE("!b!") },
12825 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
12826 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
12827 /* [0e] = */ { RT_STR_TUPLE("!e!") },
12828 /* [0f] = */ { RT_STR_TUPLE("!f!") },
12829 /* [10] = */ { RT_STR_TUPLE("!10!") },
12830 /* [11] = */ { RT_STR_TUPLE("!11!") },
12831 /* [12] = */ { RT_STR_TUPLE("!12!") },
12832 /* [13] = */ { RT_STR_TUPLE("!13!") },
12833 /* [14] = */ { RT_STR_TUPLE("!14!") },
12834 /* [15] = */ { RT_STR_TUPLE("!15!") },
12835 /* [16] = */ { RT_STR_TUPLE("!16!") },
12836 /* [17] = */ { RT_STR_TUPLE("!17!") },
12837 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
12838 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
12839 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
12840 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
12841 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
12842 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
12843 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
12844 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
12845 };
12846 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
12847 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
12848 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
12849
12850 pszBuf[off++] = ' ';
12851 pszBuf[off++] = 'C';
12852 pszBuf[off++] = 'P';
12853 pszBuf[off++] = 'L';
12854 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
12855 Assert(off < 32);
12856
12857 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
12858
12859 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
12860 {
12861 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
12862 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
12863 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
12864 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
12865 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
12866 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
12867 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
12868 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
12869 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
12870 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
12871 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
12872 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
12873 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
12874 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
12875 };
12876 if (fFlags)
12877 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
12878 if (s_aFlags[i].fFlag & fFlags)
12879 {
12880 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
12881 pszBuf[off++] = ' ';
12882 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
12883 off += s_aFlags[i].cchName;
12884 fFlags &= ~s_aFlags[i].fFlag;
12885 if (!fFlags)
12886 break;
12887 }
12888 pszBuf[off] = '\0';
12889
12890 return pszBuf;
12891}
12892
12893
12894DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
12895{
12896 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
12897#if defined(RT_ARCH_AMD64)
12898 static const char * const a_apszMarkers[] =
12899 {
12900 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
12901 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
12902 };
12903#endif
12904
12905 char szDisBuf[512];
12906 DISSTATE Dis;
12907 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
12908 uint32_t const cNative = pTb->Native.cInstructions;
12909 uint32_t offNative = 0;
12910#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12911 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
12912#endif
12913 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
12914 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
12915 : DISCPUMODE_64BIT;
12916#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12917 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
12918#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12919 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
12920#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12921# error "Port me"
12922#else
12923 csh hDisasm = ~(size_t)0;
12924# if defined(RT_ARCH_AMD64)
12925 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
12926# elif defined(RT_ARCH_ARM64)
12927 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
12928# else
12929# error "Port me"
12930# endif
12931 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
12932#endif
12933
12934 /*
12935 * Print TB info.
12936 */
12937 pHlp->pfnPrintf(pHlp,
12938 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
12939 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
12940 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
12941 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
12942#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12943 if (pDbgInfo && pDbgInfo->cEntries > 1)
12944 {
12945 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
12946
12947 /*
12948 * This disassembly is driven by the debug info which follows the native
12949 * code and indicates when it starts with the next guest instructions,
12950 * where labels are and such things.
12951 */
12952 uint32_t idxThreadedCall = 0;
12953 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
12954 uint8_t idxRange = UINT8_MAX;
12955 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
12956 uint32_t offRange = 0;
12957 uint32_t offOpcodes = 0;
12958 uint32_t const cbOpcodes = pTb->cbOpcodes;
12959 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
12960 uint32_t const cDbgEntries = pDbgInfo->cEntries;
12961 uint32_t iDbgEntry = 1;
12962 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
12963
12964 while (offNative < cNative)
12965 {
12966 /* If we're at or have passed the point where the next chunk of debug
12967 info starts, process it. */
12968 if (offDbgNativeNext <= offNative)
12969 {
12970 offDbgNativeNext = UINT32_MAX;
12971 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
12972 {
12973 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
12974 {
12975 case kIemTbDbgEntryType_GuestInstruction:
12976 {
12977 /* Did the exec flag change? */
12978 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
12979 {
12980 pHlp->pfnPrintf(pHlp,
12981 " fExec change %#08x -> %#08x %s\n",
12982 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
12983 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
12984 szDisBuf, sizeof(szDisBuf)));
12985 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
12986 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
12987 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
12988 : DISCPUMODE_64BIT;
12989 }
12990
12991 /* New opcode range? We need to fend up a spurious debug info entry here for cases
12992 where the compilation was aborted before the opcode was recorded and the actual
12993 instruction was translated to a threaded call. This may happen when we run out
12994 of ranges, or when some complicated interrupts/FFs are found to be pending or
12995 similar. So, we just deal with it here rather than in the compiler code as it
12996 is a lot simpler to do here. */
12997 if ( idxRange == UINT8_MAX
12998 || idxRange >= cRanges
12999 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
13000 {
13001 idxRange += 1;
13002 if (idxRange < cRanges)
13003 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
13004 else
13005 continue;
13006 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
13007 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
13008 + (pTb->aRanges[idxRange].idxPhysPage == 0
13009 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13010 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
13011 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13012 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
13013 pTb->aRanges[idxRange].idxPhysPage);
13014 GCPhysPc += offRange;
13015 }
13016
13017 /* Disassemble the instruction. */
13018 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
13019 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
13020 uint32_t cbInstr = 1;
13021 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13022 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
13023 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13024 if (RT_SUCCESS(rc))
13025 {
13026 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13027 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13028 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13029 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13030
13031 static unsigned const s_offMarker = 55;
13032 static char const s_szMarker[] = " ; <--- guest";
13033 if (cch < s_offMarker)
13034 {
13035 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
13036 cch = s_offMarker;
13037 }
13038 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
13039 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
13040
13041 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
13042 }
13043 else
13044 {
13045 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
13046 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
13047 cbInstr = 1;
13048 }
13049 GCPhysPc += cbInstr;
13050 offOpcodes += cbInstr;
13051 offRange += cbInstr;
13052 continue;
13053 }
13054
13055 case kIemTbDbgEntryType_ThreadedCall:
13056 pHlp->pfnPrintf(pHlp,
13057 " Call #%u to %s (%u args) - %s\n",
13058 idxThreadedCall,
13059 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13060 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13061 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
13062 idxThreadedCall++;
13063 continue;
13064
13065 case kIemTbDbgEntryType_GuestRegShadowing:
13066 {
13067 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
13068 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
13069 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
13070 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
13071 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13072 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
13073 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
13074 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
13075 else
13076 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
13077 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
13078 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13079 continue;
13080 }
13081
13082 case kIemTbDbgEntryType_Label:
13083 {
13084 const char *pszName = "what_the_fudge";
13085 const char *pszComment = "";
13086 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
13087 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
13088 {
13089 case kIemNativeLabelType_Return:
13090 pszName = "Return";
13091 break;
13092 case kIemNativeLabelType_ReturnBreak:
13093 pszName = "ReturnBreak";
13094 break;
13095 case kIemNativeLabelType_ReturnWithFlags:
13096 pszName = "ReturnWithFlags";
13097 break;
13098 case kIemNativeLabelType_NonZeroRetOrPassUp:
13099 pszName = "NonZeroRetOrPassUp";
13100 break;
13101 case kIemNativeLabelType_RaiseGp0:
13102 pszName = "RaiseGp0";
13103 break;
13104 case kIemNativeLabelType_ObsoleteTb:
13105 pszName = "ObsoleteTb";
13106 break;
13107 case kIemNativeLabelType_NeedCsLimChecking:
13108 pszName = "NeedCsLimChecking";
13109 break;
13110 case kIemNativeLabelType_CheckBranchMiss:
13111 pszName = "CheckBranchMiss";
13112 break;
13113 case kIemNativeLabelType_If:
13114 pszName = "If";
13115 fNumbered = true;
13116 break;
13117 case kIemNativeLabelType_Else:
13118 pszName = "Else";
13119 fNumbered = true;
13120 pszComment = " ; regs state restored pre-if-block";
13121 break;
13122 case kIemNativeLabelType_Endif:
13123 pszName = "Endif";
13124 fNumbered = true;
13125 break;
13126 case kIemNativeLabelType_CheckIrq:
13127 pszName = "CheckIrq_CheckVM";
13128 fNumbered = true;
13129 break;
13130 case kIemNativeLabelType_TlbLookup:
13131 pszName = "TlbLookup";
13132 fNumbered = true;
13133 break;
13134 case kIemNativeLabelType_TlbMiss:
13135 pszName = "TlbMiss";
13136 fNumbered = true;
13137 break;
13138 case kIemNativeLabelType_TlbDone:
13139 pszName = "TlbDone";
13140 fNumbered = true;
13141 break;
13142 case kIemNativeLabelType_Invalid:
13143 case kIemNativeLabelType_End:
13144 break;
13145 }
13146 if (fNumbered)
13147 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
13148 else
13149 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
13150 continue;
13151 }
13152
13153 case kIemTbDbgEntryType_NativeOffset:
13154 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
13155 Assert(offDbgNativeNext > offNative);
13156 break;
13157
13158 default:
13159 AssertFailed();
13160 }
13161 iDbgEntry++;
13162 break;
13163 }
13164 }
13165
13166 /*
13167 * Disassemble the next native instruction.
13168 */
13169 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13170# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13171 uint32_t cbInstr = sizeof(paNative[0]);
13172 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13173 if (RT_SUCCESS(rc))
13174 {
13175# if defined(RT_ARCH_AMD64)
13176 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13177 {
13178 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13179 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13180 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13181 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13182 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13183 uInfo & 0x8000 ? "recompiled" : "todo");
13184 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13185 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13186 else
13187 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13188 }
13189 else
13190# endif
13191 {
13192# ifdef RT_ARCH_AMD64
13193 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13194 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13195 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13196 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13197# elif defined(RT_ARCH_ARM64)
13198 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13199 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13200 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13201# else
13202# error "Port me"
13203# endif
13204 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13205 }
13206 }
13207 else
13208 {
13209# if defined(RT_ARCH_AMD64)
13210 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13211 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13212# elif defined(RT_ARCH_ARM64)
13213 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13214# else
13215# error "Port me"
13216# endif
13217 cbInstr = sizeof(paNative[0]);
13218 }
13219 offNative += cbInstr / sizeof(paNative[0]);
13220
13221# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13222 cs_insn *pInstr;
13223 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13224 (uintptr_t)pNativeCur, 1, &pInstr);
13225 if (cInstrs > 0)
13226 {
13227 Assert(cInstrs == 1);
13228# if defined(RT_ARCH_AMD64)
13229 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13230 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13231# else
13232 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13233 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13234# endif
13235 offNative += pInstr->size / sizeof(*pNativeCur);
13236 cs_free(pInstr, cInstrs);
13237 }
13238 else
13239 {
13240# if defined(RT_ARCH_AMD64)
13241 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13242 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13243# else
13244 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13245# endif
13246 offNative++;
13247 }
13248# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13249 }
13250 }
13251 else
13252#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
13253 {
13254 /*
13255 * No debug info, just disassemble the x86 code and then the native code.
13256 *
13257 * First the guest code:
13258 */
13259 for (unsigned i = 0; i < pTb->cRanges; i++)
13260 {
13261 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
13262 + (pTb->aRanges[i].idxPhysPage == 0
13263 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13264 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
13265 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13266 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
13267 unsigned off = pTb->aRanges[i].offOpcodes;
13268 /** @todo this ain't working when crossing pages! */
13269 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
13270 while (off < cbOpcodes)
13271 {
13272 uint32_t cbInstr = 1;
13273 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13274 &pTb->pabOpcodes[off], cbOpcodes - off,
13275 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13276 if (RT_SUCCESS(rc))
13277 {
13278 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13279 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13280 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13281 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13282 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
13283 GCPhysPc += cbInstr;
13284 off += cbInstr;
13285 }
13286 else
13287 {
13288 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
13289 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
13290 break;
13291 }
13292 }
13293 }
13294
13295 /*
13296 * Then the native code:
13297 */
13298 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
13299 while (offNative < cNative)
13300 {
13301 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13302# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13303 uint32_t cbInstr = sizeof(paNative[0]);
13304 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13305 if (RT_SUCCESS(rc))
13306 {
13307# if defined(RT_ARCH_AMD64)
13308 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13309 {
13310 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13311 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13312 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13313 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13314 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13315 uInfo & 0x8000 ? "recompiled" : "todo");
13316 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13317 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13318 else
13319 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13320 }
13321 else
13322# endif
13323 {
13324# ifdef RT_ARCH_AMD64
13325 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13326 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13327 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13328 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13329# elif defined(RT_ARCH_ARM64)
13330 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13331 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13332 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13333# else
13334# error "Port me"
13335# endif
13336 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13337 }
13338 }
13339 else
13340 {
13341# if defined(RT_ARCH_AMD64)
13342 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13343 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13344# else
13345 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13346# endif
13347 cbInstr = sizeof(paNative[0]);
13348 }
13349 offNative += cbInstr / sizeof(paNative[0]);
13350
13351# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13352 cs_insn *pInstr;
13353 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13354 (uintptr_t)pNativeCur, 1, &pInstr);
13355 if (cInstrs > 0)
13356 {
13357 Assert(cInstrs == 1);
13358# if defined(RT_ARCH_AMD64)
13359 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13360 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13361# else
13362 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13363 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13364# endif
13365 offNative += pInstr->size / sizeof(*pNativeCur);
13366 cs_free(pInstr, cInstrs);
13367 }
13368 else
13369 {
13370# if defined(RT_ARCH_AMD64)
13371 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13372 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13373# else
13374 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13375# endif
13376 offNative++;
13377 }
13378# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13379 }
13380 }
13381
13382#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13383 /* Cleanup. */
13384 cs_close(&hDisasm);
13385#endif
13386}
13387
13388
13389/**
13390 * Recompiles the given threaded TB into a native one.
13391 *
13392 * In case of failure the translation block will be returned as-is.
13393 *
13394 * @returns pTb.
13395 * @param pVCpu The cross context virtual CPU structure of the calling
13396 * thread.
13397 * @param pTb The threaded translation to recompile to native.
13398 */
13399DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
13400{
13401 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
13402
13403 /*
13404 * The first time thru, we allocate the recompiler state, the other times
13405 * we just need to reset it before using it again.
13406 */
13407 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
13408 if (RT_LIKELY(pReNative))
13409 iemNativeReInit(pReNative, pTb);
13410 else
13411 {
13412 pReNative = iemNativeInit(pVCpu, pTb);
13413 AssertReturn(pReNative, pTb);
13414 }
13415
13416#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13417 /*
13418 * First do liveness analysis. This is done backwards.
13419 */
13420 {
13421 uint32_t idxCall = pTb->Thrd.cCalls;
13422 if (idxCall <= pReNative->cLivenessEntriesAlloc)
13423 { /* likely */ }
13424 else
13425 {
13426 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
13427 while (idxCall > cAlloc)
13428 cAlloc *= 2;
13429 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
13430 AssertReturn(pvNew, pTb);
13431 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
13432 pReNative->cLivenessEntriesAlloc = cAlloc;
13433 }
13434 AssertReturn(idxCall > 0, pTb);
13435 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
13436
13437 /* The initial (final) entry. */
13438 idxCall--;
13439 paLivenessEntries[idxCall].s1.bm64 = IEMLIVENESSPART1_ALL_UNUSED;
13440 paLivenessEntries[idxCall].s2.bm64 = IEMLIVENESSPART2_ALL_UNUSED;
13441
13442 /* Loop backwards thru the calls and fill in the other entries. */
13443 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
13444 while (idxCall > 0)
13445 {
13446 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
13447 if (pfnLiveness)
13448 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
13449 else
13450 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
13451 pCallEntry--;
13452 idxCall--;
13453 }
13454
13455# ifdef VBOX_WITH_STATISTICS
13456 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
13457 to 'clobbered' rather that 'input'. */
13458 /** @todo */
13459# endif
13460 }
13461#endif
13462
13463 /*
13464 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
13465 * for aborting if an error happens.
13466 */
13467 uint32_t cCallsLeft = pTb->Thrd.cCalls;
13468#ifdef LOG_ENABLED
13469 uint32_t const cCallsOrg = cCallsLeft;
13470#endif
13471 uint32_t off = 0;
13472 int rc = VINF_SUCCESS;
13473 IEMNATIVE_TRY_SETJMP(pReNative, rc)
13474 {
13475 /*
13476 * Emit prolog code (fixed).
13477 */
13478 off = iemNativeEmitProlog(pReNative, off);
13479
13480 /*
13481 * Convert the calls to native code.
13482 */
13483#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13484 int32_t iGstInstr = -1;
13485#endif
13486#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
13487 uint32_t cThreadedCalls = 0;
13488 uint32_t cRecompiledCalls = 0;
13489#endif
13490#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
13491 uint32_t idxCurCall = 0;
13492#endif
13493 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
13494 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
13495 while (cCallsLeft-- > 0)
13496 {
13497 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
13498#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13499 pReNative->idxCurCall = idxCurCall;
13500#endif
13501
13502 /*
13503 * Debug info and assembly markup.
13504 */
13505#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
13506 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
13507 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
13508#endif
13509#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13510 iemNativeDbgInfoAddNativeOffset(pReNative, off);
13511 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
13512 {
13513 if (iGstInstr < (int32_t)pTb->cInstructions)
13514 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
13515 else
13516 Assert(iGstInstr == pTb->cInstructions);
13517 iGstInstr = pCallEntry->idxInstr;
13518 }
13519 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
13520#endif
13521#if defined(VBOX_STRICT)
13522 off = iemNativeEmitMarker(pReNative, off,
13523 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
13524#endif
13525#if defined(VBOX_STRICT)
13526 iemNativeRegAssertSanity(pReNative);
13527#endif
13528
13529 /*
13530 * Actual work.
13531 */
13532 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
13533 pfnRecom ? "(recompiled)" : "(todo)"));
13534 if (pfnRecom) /** @todo stats on this. */
13535 {
13536 off = pfnRecom(pReNative, off, pCallEntry);
13537 STAM_REL_STATS({cRecompiledCalls++;});
13538 }
13539 else
13540 {
13541 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
13542 STAM_REL_STATS({cThreadedCalls++;});
13543 }
13544 Assert(off <= pReNative->cInstrBufAlloc);
13545 Assert(pReNative->cCondDepth == 0);
13546
13547 /*
13548 * Advance.
13549 */
13550 pCallEntry++;
13551#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
13552 idxCurCall++;
13553#endif
13554 }
13555
13556 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
13557 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
13558 if (!cThreadedCalls)
13559 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
13560
13561 /*
13562 * Emit the epilog code.
13563 */
13564 uint32_t idxReturnLabel;
13565 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
13566
13567 /*
13568 * Generate special jump labels.
13569 */
13570 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
13571 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
13572 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
13573 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
13574 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
13575 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
13576 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
13577 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
13578 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
13579 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
13580 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
13581 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
13582 }
13583 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
13584 {
13585 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
13586 return pTb;
13587 }
13588 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
13589 Assert(off <= pReNative->cInstrBufAlloc);
13590
13591 /*
13592 * Make sure all labels has been defined.
13593 */
13594 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
13595#ifdef VBOX_STRICT
13596 uint32_t const cLabels = pReNative->cLabels;
13597 for (uint32_t i = 0; i < cLabels; i++)
13598 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
13599#endif
13600
13601 /*
13602 * Allocate executable memory, copy over the code we've generated.
13603 */
13604 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
13605 if (pTbAllocator->pDelayedFreeHead)
13606 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
13607
13608 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
13609 AssertReturn(paFinalInstrBuf, pTb);
13610 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
13611
13612 /*
13613 * Apply fixups.
13614 */
13615 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
13616 uint32_t const cFixups = pReNative->cFixups;
13617 for (uint32_t i = 0; i < cFixups; i++)
13618 {
13619 Assert(paFixups[i].off < off);
13620 Assert(paFixups[i].idxLabel < cLabels);
13621 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
13622 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
13623 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
13624 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
13625 switch (paFixups[i].enmType)
13626 {
13627#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
13628 case kIemNativeFixupType_Rel32:
13629 Assert(paFixups[i].off + 4 <= off);
13630 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13631 continue;
13632
13633#elif defined(RT_ARCH_ARM64)
13634 case kIemNativeFixupType_RelImm26At0:
13635 {
13636 Assert(paFixups[i].off < off);
13637 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13638 Assert(offDisp >= -262144 && offDisp < 262144);
13639 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
13640 continue;
13641 }
13642
13643 case kIemNativeFixupType_RelImm19At5:
13644 {
13645 Assert(paFixups[i].off < off);
13646 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13647 Assert(offDisp >= -262144 && offDisp < 262144);
13648 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
13649 continue;
13650 }
13651
13652 case kIemNativeFixupType_RelImm14At5:
13653 {
13654 Assert(paFixups[i].off < off);
13655 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13656 Assert(offDisp >= -8192 && offDisp < 8192);
13657 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
13658 continue;
13659 }
13660
13661#endif
13662 case kIemNativeFixupType_Invalid:
13663 case kIemNativeFixupType_End:
13664 break;
13665 }
13666 AssertFailed();
13667 }
13668
13669 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
13670 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
13671
13672 /*
13673 * Convert the translation block.
13674 */
13675 RTMemFree(pTb->Thrd.paCalls);
13676 pTb->Native.paInstructions = paFinalInstrBuf;
13677 pTb->Native.cInstructions = off;
13678 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
13679#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13680 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
13681 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
13682#endif
13683
13684 Assert(pTbAllocator->cThreadedTbs > 0);
13685 pTbAllocator->cThreadedTbs -= 1;
13686 pTbAllocator->cNativeTbs += 1;
13687 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
13688
13689#ifdef LOG_ENABLED
13690 /*
13691 * Disassemble to the log if enabled.
13692 */
13693 if (LogIs3Enabled())
13694 {
13695 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
13696 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
13697# ifdef DEBUG_bird
13698 RTLogFlush(NULL);
13699# endif
13700 }
13701#endif
13702 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
13703
13704 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
13705 return pTb;
13706}
13707
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette