VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102621

Last change on this file since 102621 was 102603, checked in by vboxsync, 14 months ago

VMM/IEM: AMD64 version of BODY_CHECK_OPCODES. Disabled because ARM64 isn't done. bugreg:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 566.7 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102603 2023-12-14 23:06:41Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128/*********************************************************************************************************************************
129* Internal Functions *
130*********************************************************************************************************************************/
131#ifdef VBOX_STRICT
132static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
133 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
134static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
135#endif
136#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
137static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
138static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
139#endif
140DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
142 IEMNATIVEGSTREG enmGstReg, uint32_t off);
143
144
145/*********************************************************************************************************************************
146* Executable Memory Allocator *
147*********************************************************************************************************************************/
148/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
149 * Use an alternative chunk sub-allocator that does store internal data
150 * in the chunk.
151 *
152 * Using the RTHeapSimple is not practial on newer darwin systems where
153 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
154 * memory. We would have to change the protection of the whole chunk for
155 * every call to RTHeapSimple, which would be rather expensive.
156 *
157 * This alternative implemenation let restrict page protection modifications
158 * to the pages backing the executable memory we just allocated.
159 */
160#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
161/** The chunk sub-allocation unit size in bytes. */
162#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
163/** The chunk sub-allocation unit size as a shift factor. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
165
166#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
167# ifdef IEMNATIVE_USE_GDB_JIT
168# define IEMNATIVE_USE_GDB_JIT_ET_DYN
169
170/** GDB JIT: Code entry. */
171typedef struct GDBJITCODEENTRY
172{
173 struct GDBJITCODEENTRY *pNext;
174 struct GDBJITCODEENTRY *pPrev;
175 uint8_t *pbSymFile;
176 uint64_t cbSymFile;
177} GDBJITCODEENTRY;
178
179/** GDB JIT: Actions. */
180typedef enum GDBJITACTIONS : uint32_t
181{
182 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
183} GDBJITACTIONS;
184
185/** GDB JIT: Descriptor. */
186typedef struct GDBJITDESCRIPTOR
187{
188 uint32_t uVersion;
189 GDBJITACTIONS enmAction;
190 GDBJITCODEENTRY *pRelevant;
191 GDBJITCODEENTRY *pHead;
192 /** Our addition: */
193 GDBJITCODEENTRY *pTail;
194} GDBJITDESCRIPTOR;
195
196/** GDB JIT: Our simple symbol file data. */
197typedef struct GDBJITSYMFILE
198{
199 Elf64_Ehdr EHdr;
200# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
201 Elf64_Shdr aShdrs[5];
202# else
203 Elf64_Shdr aShdrs[7];
204 Elf64_Phdr aPhdrs[2];
205# endif
206 /** The dwarf ehframe data for the chunk. */
207 uint8_t abEhFrame[512];
208 char szzStrTab[128];
209 Elf64_Sym aSymbols[3];
210# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
211 Elf64_Sym aDynSyms[2];
212 Elf64_Dyn aDyn[6];
213# endif
214} GDBJITSYMFILE;
215
216extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
217extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
218
219/** Init once for g_IemNativeGdbJitLock. */
220static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
221/** Init once for the critical section. */
222static RTCRITSECT g_IemNativeGdbJitLock;
223
224/** GDB reads the info here. */
225GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
226
227/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
228DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
229{
230 ASMNopPause();
231}
232
233/** @callback_method_impl{FNRTONCE} */
234static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
235{
236 RT_NOREF(pvUser);
237 return RTCritSectInit(&g_IemNativeGdbJitLock);
238}
239
240
241# endif /* IEMNATIVE_USE_GDB_JIT */
242
243/**
244 * Per-chunk unwind info for non-windows hosts.
245 */
246typedef struct IEMEXECMEMCHUNKEHFRAME
247{
248# ifdef IEMNATIVE_USE_LIBUNWIND
249 /** The offset of the FDA into abEhFrame. */
250 uintptr_t offFda;
251# else
252 /** 'struct object' storage area. */
253 uint8_t abObject[1024];
254# endif
255# ifdef IEMNATIVE_USE_GDB_JIT
256# if 0
257 /** The GDB JIT 'symbol file' data. */
258 GDBJITSYMFILE GdbJitSymFile;
259# endif
260 /** The GDB JIT list entry. */
261 GDBJITCODEENTRY GdbJitEntry;
262# endif
263 /** The dwarf ehframe data for the chunk. */
264 uint8_t abEhFrame[512];
265} IEMEXECMEMCHUNKEHFRAME;
266/** Pointer to per-chunk info info for non-windows hosts. */
267typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
268#endif
269
270
271/**
272 * An chunk of executable memory.
273 */
274typedef struct IEMEXECMEMCHUNK
275{
276#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
277 /** Number of free items in this chunk. */
278 uint32_t cFreeUnits;
279 /** Hint were to start searching for free space in the allocation bitmap. */
280 uint32_t idxFreeHint;
281#else
282 /** The heap handle. */
283 RTHEAPSIMPLE hHeap;
284#endif
285 /** Pointer to the chunk. */
286 void *pvChunk;
287#ifdef IN_RING3
288 /**
289 * Pointer to the unwind information.
290 *
291 * This is used during C++ throw and longjmp (windows and probably most other
292 * platforms). Some debuggers (windbg) makes use of it as well.
293 *
294 * Windows: This is allocated from hHeap on windows because (at least for
295 * AMD64) the UNWIND_INFO structure address in the
296 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
297 *
298 * Others: Allocated from the regular heap to avoid unnecessary executable data
299 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
300 void *pvUnwindInfo;
301#elif defined(IN_RING0)
302 /** Allocation handle. */
303 RTR0MEMOBJ hMemObj;
304#endif
305} IEMEXECMEMCHUNK;
306/** Pointer to a memory chunk. */
307typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
308
309
310/**
311 * Executable memory allocator for the native recompiler.
312 */
313typedef struct IEMEXECMEMALLOCATOR
314{
315 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
316 uint32_t uMagic;
317
318 /** The chunk size. */
319 uint32_t cbChunk;
320 /** The maximum number of chunks. */
321 uint32_t cMaxChunks;
322 /** The current number of chunks. */
323 uint32_t cChunks;
324 /** Hint where to start looking for available memory. */
325 uint32_t idxChunkHint;
326 /** Statistics: Current number of allocations. */
327 uint32_t cAllocations;
328
329 /** The total amount of memory available. */
330 uint64_t cbTotal;
331 /** Total amount of free memory. */
332 uint64_t cbFree;
333 /** Total amount of memory allocated. */
334 uint64_t cbAllocated;
335
336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
337 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
338 *
339 * Since the chunk size is a power of two and the minimum chunk size is a lot
340 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
341 * require a whole number of uint64_t elements in the allocation bitmap. So,
342 * for sake of simplicity, they are allocated as one continous chunk for
343 * simplicity/laziness. */
344 uint64_t *pbmAlloc;
345 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
346 uint32_t cUnitsPerChunk;
347 /** Number of bitmap elements per chunk (for quickly locating the bitmap
348 * portion corresponding to an chunk). */
349 uint32_t cBitmapElementsPerChunk;
350#else
351 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
352 * @{ */
353 /** The size of the heap internal block header. This is used to adjust the
354 * request memory size to make sure there is exacly enough room for a header at
355 * the end of the blocks we allocate before the next 64 byte alignment line. */
356 uint32_t cbHeapBlockHdr;
357 /** The size of initial heap allocation required make sure the first
358 * allocation is correctly aligned. */
359 uint32_t cbHeapAlignTweak;
360 /** The alignment tweak allocation address. */
361 void *pvAlignTweak;
362 /** @} */
363#endif
364
365#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
366 /** Pointer to the array of unwind info running parallel to aChunks (same
367 * allocation as this structure, located after the bitmaps).
368 * (For Windows, the structures must reside in 32-bit RVA distance to the
369 * actual chunk, so they are allocated off the chunk.) */
370 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
371#endif
372
373 /** The allocation chunks. */
374 RT_FLEXIBLE_ARRAY_EXTENSION
375 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
376} IEMEXECMEMALLOCATOR;
377/** Pointer to an executable memory allocator. */
378typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
379
380/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
381#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
382
383
384static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
385
386
387/**
388 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
389 * the heap statistics.
390 */
391static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
392 uint32_t cbReq, uint32_t idxChunk)
393{
394 pExecMemAllocator->cAllocations += 1;
395 pExecMemAllocator->cbAllocated += cbReq;
396#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
397 pExecMemAllocator->cbFree -= cbReq;
398#else
399 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
400#endif
401 pExecMemAllocator->idxChunkHint = idxChunk;
402
403#ifdef RT_OS_DARWIN
404 /*
405 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
406 * on darwin. So, we mark the pages returned as read+write after alloc and
407 * expect the caller to call iemExecMemAllocatorReadyForUse when done
408 * writing to the allocation.
409 *
410 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
411 * for details.
412 */
413 /** @todo detect if this is necessary... it wasn't required on 10.15 or
414 * whatever older version it was. */
415 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
416 AssertRC(rc);
417#endif
418
419 return pvRet;
420}
421
422
423#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
424static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
425 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
426{
427 /*
428 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
429 */
430 Assert(!(cToScan & 63));
431 Assert(!(idxFirst & 63));
432 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
433 pbmAlloc += idxFirst / 64;
434
435 /*
436 * Scan the bitmap for cReqUnits of consequtive clear bits
437 */
438 /** @todo This can probably be done more efficiently for non-x86 systems. */
439 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
440 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
441 {
442 uint32_t idxAddBit = 1;
443 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
444 idxAddBit++;
445 if (idxAddBit >= cReqUnits)
446 {
447 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
448
449 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
450 pChunk->cFreeUnits -= cReqUnits;
451 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
452
453 void * const pvRet = (uint8_t *)pChunk->pvChunk
454 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
455
456 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
457 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
458 }
459
460 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
461 }
462 return NULL;
463}
464#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
465
466
467static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
468{
469#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
470 /*
471 * Figure out how much to allocate.
472 */
473 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
474 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
475 {
476 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
477 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
478 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
479 {
480 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
481 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
482 if (pvRet)
483 return pvRet;
484 }
485 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
486 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
487 cReqUnits, idxChunk);
488 }
489#else
490 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
491 if (pvRet)
492 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
493#endif
494 return NULL;
495
496}
497
498
499/**
500 * Allocates @a cbReq bytes of executable memory.
501 *
502 * @returns Pointer to the memory, NULL if out of memory or other problem
503 * encountered.
504 * @param pVCpu The cross context virtual CPU structure of the calling
505 * thread.
506 * @param cbReq How many bytes are required.
507 */
508static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
509{
510 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
511 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
512 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
513
514
515 for (unsigned iIteration = 0;; iIteration++)
516 {
517 /*
518 * Adjust the request size so it'll fit the allocator alignment/whatnot.
519 *
520 * For the RTHeapSimple allocator this means to follow the logic described
521 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
522 * existing chunks if we think we've got sufficient free memory around.
523 *
524 * While for the alternative one we just align it up to a whole unit size.
525 */
526#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
527 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
528#else
529 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
530#endif
531 if (cbReq <= pExecMemAllocator->cbFree)
532 {
533 uint32_t const cChunks = pExecMemAllocator->cChunks;
534 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
535 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
536 {
537 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
538 if (pvRet)
539 return pvRet;
540 }
541 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 }
548
549 /*
550 * Can we grow it with another chunk?
551 */
552 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
553 {
554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
555 AssertLogRelRCReturn(rc, NULL);
556
557 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
558 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
559 if (pvRet)
560 return pvRet;
561 AssertFailed();
562 }
563
564 /*
565 * Try prune native TBs once.
566 */
567 if (iIteration == 0)
568 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
569 else
570 {
571 /** @todo stats... */
572 return NULL;
573 }
574 }
575
576}
577
578
579/** This is a hook that we may need later for changing memory protection back
580 * to readonly+exec */
581static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
582{
583#ifdef RT_OS_DARWIN
584 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
585 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
586 AssertRC(rc); RT_NOREF(pVCpu);
587
588 /*
589 * Flush the instruction cache:
590 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
591 */
592 /* sys_dcache_flush(pv, cb); - not necessary */
593 sys_icache_invalidate(pv, cb);
594#else
595 RT_NOREF(pVCpu, pv, cb);
596#endif
597}
598
599
600/**
601 * Frees executable memory.
602 */
603void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
604{
605 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
606 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
607 Assert(pv);
608#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
609 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
610#else
611 Assert(!((uintptr_t)pv & 63));
612#endif
613
614 /* Align the size as we did when allocating the block. */
615#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
616 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
617#else
618 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
619#endif
620
621 /* Free it / assert sanity. */
622#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
623 uint32_t const cChunks = pExecMemAllocator->cChunks;
624 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
625 bool fFound = false;
626 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
627 {
628 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
629 fFound = offChunk < cbChunk;
630 if (fFound)
631 {
632#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
633 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
634 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
635
636 /* Check that it's valid and free it. */
637 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
638 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
639 for (uint32_t i = 1; i < cReqUnits; i++)
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
641 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
642
643 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
644 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
645
646 /* Update the stats. */
647 pExecMemAllocator->cbAllocated -= cb;
648 pExecMemAllocator->cbFree += cb;
649 pExecMemAllocator->cAllocations -= 1;
650 return;
651#else
652 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
653 break;
654#endif
655 }
656 }
657# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
658 AssertFailed();
659# else
660 Assert(fFound);
661# endif
662#endif
663
664#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
665 /* Update stats while cb is freshly calculated.*/
666 pExecMemAllocator->cbAllocated -= cb;
667 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
668 pExecMemAllocator->cAllocations -= 1;
669
670 /* Free it. */
671 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
672#endif
673}
674
675
676
677#ifdef IN_RING3
678# ifdef RT_OS_WINDOWS
679
680/**
681 * Initializes the unwind info structures for windows hosts.
682 */
683static int
684iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
685 void *pvChunk, uint32_t idxChunk)
686{
687 RT_NOREF(pVCpu);
688
689 /*
690 * The AMD64 unwind opcodes.
691 *
692 * This is a program that starts with RSP after a RET instruction that
693 * ends up in recompiled code, and the operations we describe here will
694 * restore all non-volatile registers and bring RSP back to where our
695 * RET address is. This means it's reverse order from what happens in
696 * the prologue.
697 *
698 * Note! Using a frame register approach here both because we have one
699 * and but mainly because the UWOP_ALLOC_LARGE argument values
700 * would be a pain to write initializers for. On the positive
701 * side, we're impervious to changes in the the stack variable
702 * area can can deal with dynamic stack allocations if necessary.
703 */
704 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
705 {
706 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
707 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
708 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
709 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
710 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
711 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
712 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
713 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
714 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
715 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
716 };
717 union
718 {
719 IMAGE_UNWIND_INFO Info;
720 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
721 } s_UnwindInfo =
722 {
723 {
724 /* .Version = */ 1,
725 /* .Flags = */ 0,
726 /* .SizeOfProlog = */ 16, /* whatever */
727 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
728 /* .FrameRegister = */ X86_GREG_xBP,
729 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
730 }
731 };
732 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
733 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
734
735 /*
736 * Calc how much space we need and allocate it off the exec heap.
737 */
738 unsigned const cFunctionEntries = 1;
739 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
740 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
741# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
743 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
744 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
745# else
746 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
747 - pExecMemAllocator->cbHeapBlockHdr;
748 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
749 32 /*cbAlignment*/);
750# endif
751 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
752 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
753
754 /*
755 * Initialize the structures.
756 */
757 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
758
759 paFunctions[0].BeginAddress = 0;
760 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
761 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
762
763 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
764 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
765
766 /*
767 * Register it.
768 */
769 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
770 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
771
772 return VINF_SUCCESS;
773}
774
775
776# else /* !RT_OS_WINDOWS */
777
778/**
779 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
780 */
781DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
782{
783 if (iValue >= 64)
784 {
785 Assert(iValue < 0x2000);
786 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
787 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
788 }
789 else if (iValue >= 0)
790 *Ptr.pb++ = (uint8_t)iValue;
791 else if (iValue > -64)
792 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
793 else
794 {
795 Assert(iValue > -0x2000);
796 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
797 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
798 }
799 return Ptr;
800}
801
802
803/**
804 * Emits an ULEB128 encoded value (up to 64-bit wide).
805 */
806DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
807{
808 while (uValue >= 0x80)
809 {
810 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
811 uValue >>= 7;
812 }
813 *Ptr.pb++ = (uint8_t)uValue;
814 return Ptr;
815}
816
817
818/**
819 * Emits a CFA rule as register @a uReg + offset @a off.
820 */
821DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
822{
823 *Ptr.pb++ = DW_CFA_def_cfa;
824 Ptr = iemDwarfPutUleb128(Ptr, uReg);
825 Ptr = iemDwarfPutUleb128(Ptr, off);
826 return Ptr;
827}
828
829
830/**
831 * Emits a register (@a uReg) save location:
832 * CFA + @a off * data_alignment_factor
833 */
834DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
835{
836 if (uReg < 0x40)
837 *Ptr.pb++ = DW_CFA_offset | uReg;
838 else
839 {
840 *Ptr.pb++ = DW_CFA_offset_extended;
841 Ptr = iemDwarfPutUleb128(Ptr, uReg);
842 }
843 Ptr = iemDwarfPutUleb128(Ptr, off);
844 return Ptr;
845}
846
847
848# if 0 /* unused */
849/**
850 * Emits a register (@a uReg) save location, using signed offset:
851 * CFA + @a offSigned * data_alignment_factor
852 */
853DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
854{
855 *Ptr.pb++ = DW_CFA_offset_extended_sf;
856 Ptr = iemDwarfPutUleb128(Ptr, uReg);
857 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
858 return Ptr;
859}
860# endif
861
862
863/**
864 * Initializes the unwind info section for non-windows hosts.
865 */
866static int
867iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
868 void *pvChunk, uint32_t idxChunk)
869{
870 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
871 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
872
873 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
874
875 /*
876 * Generate the CIE first.
877 */
878# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
879 uint8_t const iDwarfVer = 3;
880# else
881 uint8_t const iDwarfVer = 4;
882# endif
883 RTPTRUNION const PtrCie = Ptr;
884 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
885 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
886 *Ptr.pb++ = iDwarfVer; /* DwARF version */
887 *Ptr.pb++ = 0; /* Augmentation. */
888 if (iDwarfVer >= 4)
889 {
890 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
891 *Ptr.pb++ = 0; /* Segment selector size. */
892 }
893# ifdef RT_ARCH_AMD64
894 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
895# else
896 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
897# endif
898 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
901# elif defined(RT_ARCH_ARM64)
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
903# else
904# error "port me"
905# endif
906 /* Initial instructions: */
907# ifdef RT_ARCH_AMD64
908 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
916# elif defined(RT_ARCH_ARM64)
917# if 1
918 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
919# else
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
921# endif
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
934 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
935 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
936# else
937# error "port me"
938# endif
939 while ((Ptr.u - PtrCie.u) & 3)
940 *Ptr.pb++ = DW_CFA_nop;
941 /* Finalize the CIE size. */
942 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
943
944 /*
945 * Generate an FDE for the whole chunk area.
946 */
947# ifdef IEMNATIVE_USE_LIBUNWIND
948 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
949# endif
950 RTPTRUNION const PtrFde = Ptr;
951 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
952 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
953 Ptr.pu32++;
954 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
955 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
956# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
957 *Ptr.pb++ = DW_CFA_nop;
958# endif
959 while ((Ptr.u - PtrFde.u) & 3)
960 *Ptr.pb++ = DW_CFA_nop;
961 /* Finalize the FDE size. */
962 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
963
964 /* Terminator entry. */
965 *Ptr.pu32++ = 0;
966 *Ptr.pu32++ = 0; /* just to be sure... */
967 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
968
969 /*
970 * Register it.
971 */
972# ifdef IEMNATIVE_USE_LIBUNWIND
973 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
974# else
975 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
976 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
977# endif
978
979# ifdef IEMNATIVE_USE_GDB_JIT
980 /*
981 * Now for telling GDB about this (experimental).
982 *
983 * This seems to work best with ET_DYN.
984 */
985 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
986# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
987 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
988 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
989# else
990 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
991 - pExecMemAllocator->cbHeapBlockHdr;
992 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
993# endif
994 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
995 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
996
997 RT_ZERO(*pSymFile);
998
999 /*
1000 * The ELF header:
1001 */
1002 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1003 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1004 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1005 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1006 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1007 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1008 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1009 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1010# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1011 pSymFile->EHdr.e_type = ET_DYN;
1012# else
1013 pSymFile->EHdr.e_type = ET_REL;
1014# endif
1015# ifdef RT_ARCH_AMD64
1016 pSymFile->EHdr.e_machine = EM_AMD64;
1017# elif defined(RT_ARCH_ARM64)
1018 pSymFile->EHdr.e_machine = EM_AARCH64;
1019# else
1020# error "port me"
1021# endif
1022 pSymFile->EHdr.e_version = 1; /*?*/
1023 pSymFile->EHdr.e_entry = 0;
1024# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1025 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1026# else
1027 pSymFile->EHdr.e_phoff = 0;
1028# endif
1029 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1030 pSymFile->EHdr.e_flags = 0;
1031 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1032# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1033 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1034 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1035# else
1036 pSymFile->EHdr.e_phentsize = 0;
1037 pSymFile->EHdr.e_phnum = 0;
1038# endif
1039 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1040 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1041 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1042
1043 uint32_t offStrTab = 0;
1044#define APPEND_STR(a_szStr) do { \
1045 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1046 offStrTab += sizeof(a_szStr); \
1047 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1048 } while (0)
1049#define APPEND_STR_FMT(a_szStr, ...) do { \
1050 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1051 offStrTab++; \
1052 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1053 } while (0)
1054
1055 /*
1056 * Section headers.
1057 */
1058 /* Section header #0: NULL */
1059 unsigned i = 0;
1060 APPEND_STR("");
1061 RT_ZERO(pSymFile->aShdrs[i]);
1062 i++;
1063
1064 /* Section header: .eh_frame */
1065 pSymFile->aShdrs[i].sh_name = offStrTab;
1066 APPEND_STR(".eh_frame");
1067 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1068 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1069# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1070 pSymFile->aShdrs[i].sh_offset
1071 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1072# else
1073 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1074 pSymFile->aShdrs[i].sh_offset = 0;
1075# endif
1076
1077 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1078 pSymFile->aShdrs[i].sh_link = 0;
1079 pSymFile->aShdrs[i].sh_info = 0;
1080 pSymFile->aShdrs[i].sh_addralign = 1;
1081 pSymFile->aShdrs[i].sh_entsize = 0;
1082 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1083 i++;
1084
1085 /* Section header: .shstrtab */
1086 unsigned const iShStrTab = i;
1087 pSymFile->EHdr.e_shstrndx = iShStrTab;
1088 pSymFile->aShdrs[i].sh_name = offStrTab;
1089 APPEND_STR(".shstrtab");
1090 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1091 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1092# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1093 pSymFile->aShdrs[i].sh_offset
1094 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1095# else
1096 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1097 pSymFile->aShdrs[i].sh_offset = 0;
1098# endif
1099 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1100 pSymFile->aShdrs[i].sh_link = 0;
1101 pSymFile->aShdrs[i].sh_info = 0;
1102 pSymFile->aShdrs[i].sh_addralign = 1;
1103 pSymFile->aShdrs[i].sh_entsize = 0;
1104 i++;
1105
1106 /* Section header: .symbols */
1107 pSymFile->aShdrs[i].sh_name = offStrTab;
1108 APPEND_STR(".symtab");
1109 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1110 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1111 pSymFile->aShdrs[i].sh_offset
1112 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1113 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1114 pSymFile->aShdrs[i].sh_link = iShStrTab;
1115 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1117 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1118 i++;
1119
1120# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1121 /* Section header: .symbols */
1122 pSymFile->aShdrs[i].sh_name = offStrTab;
1123 APPEND_STR(".dynsym");
1124 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1125 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1126 pSymFile->aShdrs[i].sh_offset
1127 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1128 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1129 pSymFile->aShdrs[i].sh_link = iShStrTab;
1130 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1132 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1133 i++;
1134# endif
1135
1136# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1137 /* Section header: .dynamic */
1138 pSymFile->aShdrs[i].sh_name = offStrTab;
1139 APPEND_STR(".dynamic");
1140 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1141 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1142 pSymFile->aShdrs[i].sh_offset
1143 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1144 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1145 pSymFile->aShdrs[i].sh_link = iShStrTab;
1146 pSymFile->aShdrs[i].sh_info = 0;
1147 pSymFile->aShdrs[i].sh_addralign = 1;
1148 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1149 i++;
1150# endif
1151
1152 /* Section header: .text */
1153 unsigned const iShText = i;
1154 pSymFile->aShdrs[i].sh_name = offStrTab;
1155 APPEND_STR(".text");
1156 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1157 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1158# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1159 pSymFile->aShdrs[i].sh_offset
1160 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1161# else
1162 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1163 pSymFile->aShdrs[i].sh_offset = 0;
1164# endif
1165 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1166 pSymFile->aShdrs[i].sh_link = 0;
1167 pSymFile->aShdrs[i].sh_info = 0;
1168 pSymFile->aShdrs[i].sh_addralign = 1;
1169 pSymFile->aShdrs[i].sh_entsize = 0;
1170 i++;
1171
1172 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1173
1174# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1175 /*
1176 * The program headers:
1177 */
1178 /* Everything in a single LOAD segment: */
1179 i = 0;
1180 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1181 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1182 pSymFile->aPhdrs[i].p_offset
1183 = pSymFile->aPhdrs[i].p_vaddr
1184 = pSymFile->aPhdrs[i].p_paddr = 0;
1185 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1186 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1187 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1188 i++;
1189 /* The .dynamic segment. */
1190 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1191 pSymFile->aPhdrs[i].p_flags = PF_R;
1192 pSymFile->aPhdrs[i].p_offset
1193 = pSymFile->aPhdrs[i].p_vaddr
1194 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1195 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1196 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1197 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1198 i++;
1199
1200 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1201
1202 /*
1203 * The dynamic section:
1204 */
1205 i = 0;
1206 pSymFile->aDyn[i].d_tag = DT_SONAME;
1207 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1208 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1211 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1214 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_NULL;
1223 i++;
1224 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1225# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1226
1227 /*
1228 * Symbol tables:
1229 */
1230 /** @todo gdb doesn't seem to really like this ... */
1231 i = 0;
1232 pSymFile->aSymbols[i].st_name = 0;
1233 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1234 pSymFile->aSymbols[i].st_value = 0;
1235 pSymFile->aSymbols[i].st_size = 0;
1236 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1237 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1238# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1239 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1240# endif
1241 i++;
1242
1243 pSymFile->aSymbols[i].st_name = 0;
1244 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1245 pSymFile->aSymbols[i].st_value = 0;
1246 pSymFile->aSymbols[i].st_size = 0;
1247 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1248 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1249 i++;
1250
1251 pSymFile->aSymbols[i].st_name = offStrTab;
1252 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1253# if 0
1254 pSymFile->aSymbols[i].st_shndx = iShText;
1255 pSymFile->aSymbols[i].st_value = 0;
1256# else
1257 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1258 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1259# endif
1260 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1261 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1262 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1263# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1264 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1265 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1266# endif
1267 i++;
1268
1269 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1270 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1271
1272 /*
1273 * The GDB JIT entry and informing GDB.
1274 */
1275 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1276# if 1
1277 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1278# else
1279 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1280# endif
1281
1282 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1283 RTCritSectEnter(&g_IemNativeGdbJitLock);
1284 pEhFrame->GdbJitEntry.pNext = NULL;
1285 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1286 if (__jit_debug_descriptor.pTail)
1287 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1288 else
1289 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1290 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1291 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1292
1293 /* Notify GDB: */
1294 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1295 __jit_debug_register_code();
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1297 RTCritSectLeave(&g_IemNativeGdbJitLock);
1298
1299# else /* !IEMNATIVE_USE_GDB_JIT */
1300 RT_NOREF(pVCpu);
1301# endif /* !IEMNATIVE_USE_GDB_JIT */
1302
1303 return VINF_SUCCESS;
1304}
1305
1306# endif /* !RT_OS_WINDOWS */
1307#endif /* IN_RING3 */
1308
1309
1310/**
1311 * Adds another chunk to the executable memory allocator.
1312 *
1313 * This is used by the init code for the initial allocation and later by the
1314 * regular allocator function when it's out of memory.
1315 */
1316static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1317{
1318 /* Check that we've room for growth. */
1319 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1320 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1321
1322 /* Allocate a chunk. */
1323#ifdef RT_OS_DARWIN
1324 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1325#else
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1327#endif
1328 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1329
1330#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1331 int rc = VINF_SUCCESS;
1332#else
1333 /* Initialize the heap for the chunk. */
1334 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1335 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1336 AssertRC(rc);
1337 if (RT_SUCCESS(rc))
1338 {
1339 /*
1340 * We want the memory to be aligned on 64 byte, so the first time thru
1341 * here we do some exploratory allocations to see how we can achieve this.
1342 * On subsequent runs we only make an initial adjustment allocation, if
1343 * necessary.
1344 *
1345 * Since we own the heap implementation, we know that the internal block
1346 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1347 * so all we need to wrt allocation size adjustments is to add 32 bytes
1348 * to the size, align up by 64 bytes, and subtract 32 bytes.
1349 *
1350 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1351 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1352 * allocation to force subsequent allocations to return 64 byte aligned
1353 * user areas.
1354 */
1355 if (!pExecMemAllocator->cbHeapBlockHdr)
1356 {
1357 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1358 pExecMemAllocator->cbHeapAlignTweak = 64;
1359 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1360 32 /*cbAlignment*/);
1361 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1362
1363 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1364 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1365 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1366 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1367 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1368
1369 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 RTHeapSimpleFree(hHeap, pvTest2);
1376 RTHeapSimpleFree(hHeap, pvTest1);
1377 }
1378 else
1379 {
1380 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1381 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1382 }
1383 if (RT_SUCCESS(rc))
1384#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1385 {
1386 /*
1387 * Add the chunk.
1388 *
1389 * This must be done before the unwind init so windows can allocate
1390 * memory from the chunk when using the alternative sub-allocator.
1391 */
1392 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1393#ifdef IN_RING3
1394 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1395#endif
1396#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1397 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1398#else
1399 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1400 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1401 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1402 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1403#endif
1404
1405 pExecMemAllocator->cChunks = idxChunk + 1;
1406 pExecMemAllocator->idxChunkHint = idxChunk;
1407
1408#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1409 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1410 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1411#else
1412 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1413 pExecMemAllocator->cbTotal += cbFree;
1414 pExecMemAllocator->cbFree += cbFree;
1415#endif
1416
1417#ifdef IN_RING3
1418 /*
1419 * Initialize the unwind information (this cannot really fail atm).
1420 * (This sets pvUnwindInfo.)
1421 */
1422 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1423 if (RT_SUCCESS(rc))
1424#endif
1425 {
1426 return VINF_SUCCESS;
1427 }
1428
1429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1430 /* Just in case the impossible happens, undo the above up: */
1431 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1432 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1433 pExecMemAllocator->cChunks = idxChunk;
1434 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1435 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1436 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1437 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1438#endif
1439 }
1440#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1441 }
1442#endif
1443 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1444 RT_NOREF(pVCpu);
1445 return rc;
1446}
1447
1448
1449/**
1450 * Initializes the executable memory allocator for native recompilation on the
1451 * calling EMT.
1452 *
1453 * @returns VBox status code.
1454 * @param pVCpu The cross context virtual CPU structure of the calling
1455 * thread.
1456 * @param cbMax The max size of the allocator.
1457 * @param cbInitial The initial allocator size.
1458 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1459 * dependent).
1460 */
1461int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1462{
1463 /*
1464 * Validate input.
1465 */
1466 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1467 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1468 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1469 || cbChunk == 0
1470 || ( RT_IS_POWER_OF_TWO(cbChunk)
1471 && cbChunk >= _1M
1472 && cbChunk <= _256M
1473 && cbChunk <= cbMax),
1474 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1475 VERR_OUT_OF_RANGE);
1476
1477 /*
1478 * Adjust/figure out the chunk size.
1479 */
1480 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1481 {
1482 if (cbMax >= _256M)
1483 cbChunk = _64M;
1484 else
1485 {
1486 if (cbMax < _16M)
1487 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1488 else
1489 cbChunk = (uint32_t)cbMax / 4;
1490 if (!RT_IS_POWER_OF_TWO(cbChunk))
1491 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1492 }
1493 }
1494
1495 if (cbChunk > cbMax)
1496 cbMax = cbChunk;
1497 else
1498 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1499 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1500 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1501
1502 /*
1503 * Allocate and initialize the allocatore instance.
1504 */
1505 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1506#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1507 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1508 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1509 cbNeeded += cbBitmap * cMaxChunks;
1510 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1511 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1512#endif
1513#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1514 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1515 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1516#endif
1517 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1518 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1519 VERR_NO_MEMORY);
1520 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1521 pExecMemAllocator->cbChunk = cbChunk;
1522 pExecMemAllocator->cMaxChunks = cMaxChunks;
1523 pExecMemAllocator->cChunks = 0;
1524 pExecMemAllocator->idxChunkHint = 0;
1525 pExecMemAllocator->cAllocations = 0;
1526 pExecMemAllocator->cbTotal = 0;
1527 pExecMemAllocator->cbFree = 0;
1528 pExecMemAllocator->cbAllocated = 0;
1529#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1530 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1531 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1532 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1533 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1534#endif
1535#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1536 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1537#endif
1538 for (uint32_t i = 0; i < cMaxChunks; i++)
1539 {
1540#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1541 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1542 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1543#else
1544 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1545#endif
1546 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1547#ifdef IN_RING0
1548 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1549#else
1550 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1551#endif
1552 }
1553 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1554
1555 /*
1556 * Do the initial allocations.
1557 */
1558 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1559 {
1560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1561 AssertLogRelRCReturn(rc, rc);
1562 }
1563
1564 pExecMemAllocator->idxChunkHint = 0;
1565
1566 return VINF_SUCCESS;
1567}
1568
1569
1570/*********************************************************************************************************************************
1571* Native Recompilation *
1572*********************************************************************************************************************************/
1573
1574
1575/**
1576 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1577 */
1578IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1579{
1580 pVCpu->iem.s.cInstructions += idxInstr;
1581 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1582}
1583
1584
1585/**
1586 * Used by TB code when it wants to raise a \#GP(0).
1587 */
1588IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1589{
1590 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1591#ifndef _MSC_VER
1592 return VINF_IEM_RAISED_XCPT; /* not reached */
1593#endif
1594}
1595
1596
1597/**
1598 * Used by TB code when it wants to raise a \#GP(0).
1599 * @see iemThreadeFuncWorkerObsoleteTb
1600 */
1601IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1602{
1603 /* We set fSafeToFree to false where as we're being called in the context
1604 of a TB callback function, which for native TBs means we cannot release
1605 the executable memory till we've returned our way back to iemTbExec as
1606 that return path codes via the native code generated for the TB. */
1607 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1608 return VINF_IEM_REEXEC_BREAK;
1609}
1610
1611
1612/*********************************************************************************************************************************
1613* Helpers: Segmented memory fetches and stores. *
1614*********************************************************************************************************************************/
1615
1616/**
1617 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1618 */
1619IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1620{
1621 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1622}
1623
1624
1625/**
1626 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1627 * to 16 bits.
1628 */
1629IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1630{
1631 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1632}
1633
1634
1635/**
1636 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1637 * to 32 bits.
1638 */
1639IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1640{
1641 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1642}
1643
1644/**
1645 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1646 * to 64 bits.
1647 */
1648IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1649{
1650 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1651}
1652
1653
1654/**
1655 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1656 */
1657IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1658{
1659 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1660}
1661
1662
1663/**
1664 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1665 * to 32 bits.
1666 */
1667IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1668{
1669 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1670}
1671
1672
1673/**
1674 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1675 * to 64 bits.
1676 */
1677IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1678{
1679 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1680}
1681
1682
1683/**
1684 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1685 */
1686IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1687{
1688 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1689}
1690
1691
1692/**
1693 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1694 * to 64 bits.
1695 */
1696IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1697{
1698 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1699}
1700
1701
1702/**
1703 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1704 */
1705IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1706{
1707 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1708}
1709
1710
1711/**
1712 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1713 */
1714IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1715{
1716 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1717}
1718
1719
1720/**
1721 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1722 */
1723IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1724{
1725 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1726}
1727
1728
1729/**
1730 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1731 */
1732IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1733{
1734 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1735}
1736
1737
1738/**
1739 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1742{
1743 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1744}
1745
1746
1747
1748/**
1749 * Used by TB code to push unsigned 16-bit value onto a generic stack.
1750 */
1751IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1752{
1753 iemMemStackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemStackPushU16SafeJmp */
1754}
1755
1756
1757/**
1758 * Used by TB code to push unsigned 32-bit value onto a generic stack.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
1761{
1762 iemMemStackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SafeJmp */
1763}
1764
1765
1766/**
1767 * Used by TB code to push 32-bit selector value onto a generic stack.
1768 *
1769 * Intel CPUs doesn't do write a whole dword, thus the special function.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
1772{
1773 iemMemStackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SRegSafeJmp */
1774}
1775
1776
1777/**
1778 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1779 */
1780IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
1781{
1782 iemMemStackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemStackPushU64SafeJmp */
1783}
1784
1785
1786/**
1787 * Used by TB code to pop a 16-bit general purpose register off a generic stack.
1788 */
1789IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
1790{
1791 iemMemStackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU16SafeJmp */
1792}
1793
1794
1795/**
1796 * Used by TB code to pop a 32-bit general purpose register off a generic stack.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
1799{
1800 iemMemStackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU32SafeJmp */
1801}
1802
1803
1804/**
1805 * Used by TB code to pop a 64-bit general purpose register off a generic stack.
1806 */
1807IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
1808{
1809 iemMemStackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU64SafeJmp */
1810}
1811
1812
1813
1814/*********************************************************************************************************************************
1815* Helpers: Flat memory fetches and stores. *
1816*********************************************************************************************************************************/
1817
1818/**
1819 * Used by TB code to load unsigned 8-bit data w/ flat address.
1820 * @note Zero extending the value to 64-bit to simplify assembly.
1821 */
1822IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1823{
1824 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1825}
1826
1827
1828/**
1829 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1830 * to 16 bits.
1831 * @note Zero extending the value to 64-bit to simplify assembly.
1832 */
1833IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1834{
1835 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1836}
1837
1838
1839/**
1840 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1841 * to 32 bits.
1842 * @note Zero extending the value to 64-bit to simplify assembly.
1843 */
1844IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1845{
1846 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1847}
1848
1849
1850/**
1851 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1852 * to 64 bits.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1855{
1856 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 16-bit data w/ flat address.
1862 * @note Zero extending the value to 64-bit to simplify assembly.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1865{
1866 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1867}
1868
1869
1870/**
1871 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1872 * to 32 bits.
1873 * @note Zero extending the value to 64-bit to simplify assembly.
1874 */
1875IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1876{
1877 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1878}
1879
1880
1881/**
1882 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1883 * to 64 bits.
1884 * @note Zero extending the value to 64-bit to simplify assembly.
1885 */
1886IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1887{
1888 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1889}
1890
1891
1892/**
1893 * Used by TB code to load unsigned 32-bit data w/ flat address.
1894 * @note Zero extending the value to 64-bit to simplify assembly.
1895 */
1896IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1897{
1898 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1899}
1900
1901
1902/**
1903 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1904 * to 64 bits.
1905 * @note Zero extending the value to 64-bit to simplify assembly.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1908{
1909 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1910}
1911
1912
1913/**
1914 * Used by TB code to load unsigned 64-bit data w/ flat address.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1917{
1918 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1919}
1920
1921
1922/**
1923 * Used by TB code to store unsigned 8-bit data w/ flat address.
1924 */
1925IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1926{
1927 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1928}
1929
1930
1931/**
1932 * Used by TB code to store unsigned 16-bit data w/ flat address.
1933 */
1934IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1935{
1936 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1937}
1938
1939
1940/**
1941 * Used by TB code to store unsigned 32-bit data w/ flat address.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1944{
1945 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1946}
1947
1948
1949/**
1950 * Used by TB code to store unsigned 64-bit data w/ flat address.
1951 */
1952IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1953{
1954 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1955}
1956
1957
1958
1959/**
1960 * Used by TB code to push unsigned 16-bit value onto a flat 32-bit stack.
1961 */
1962IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1963{
1964 iemMemFlat32StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat32StackPushU16SafeJmp */
1965}
1966
1967
1968/**
1969 * Used by TB code to push unsigned 32-bit value onto a flat 32-bit stack.
1970 */
1971IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
1972{
1973 iemMemFlat32StackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SafeJmp */
1974}
1975
1976
1977/**
1978 * Used by TB code to push segment selector value onto a flat 32-bit stack.
1979 *
1980 * Intel CPUs doesn't do write a whole dword, thus the special function.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
1983{
1984 iemMemFlat32StackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SRegSafeJmp */
1985}
1986
1987
1988/**
1989 * Used by TB code to pop a 16-bit general purpose register off a flat 32-bit stack.
1990 */
1991IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
1992{
1993 iemMemFlat32StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU16SafeJmp */
1994}
1995
1996
1997/**
1998 * Used by TB code to pop a 64-bit general purpose register off a flat 32-bit stack.
1999 */
2000IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
2001{
2002 iemMemFlat32StackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU32SafeJmp */
2003}
2004
2005
2006
2007/**
2008 * Used by TB code to push unsigned 16-bit value onto a flat 64-bit stack.
2009 */
2010IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
2011{
2012 iemMemFlat64StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat64StackPushU16SafeJmp */
2013}
2014
2015
2016/**
2017 * Used by TB code to push unsigned 64-bit value onto a flat 64-bit stack.
2018 */
2019IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
2020{
2021 iemMemFlat64StackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemFlat64StackPushU64SafeJmp */
2022}
2023
2024
2025/**
2026 * Used by TB code to pop a 16-bit general purpose register off a flat 64-bit stack.
2027 */
2028IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
2029{
2030 iemMemFlat64StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU16SafeJmp */
2031}
2032
2033
2034/**
2035 * Used by TB code to pop a 64-bit general purpose register off a flat 64-bit stack.
2036 */
2037IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
2038{
2039 iemMemFlat64StackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU64SafeJmp */
2040}
2041
2042
2043
2044/*********************************************************************************************************************************
2045* Helpers: Segmented memory mapping. *
2046*********************************************************************************************************************************/
2047
2048/**
2049 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2050 */
2051IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2052 RTGCPTR GCPtrMem, uint8_t iSegReg))
2053{
2054 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RwSafeJmp */
2055}
2056
2057
2058/**
2059 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2060 */
2061IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2062 RTGCPTR GCPtrMem, uint8_t iSegReg))
2063{
2064 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8WoSafeJmp */
2065}
2066
2067
2068/**
2069 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2070 */
2071IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2072 RTGCPTR GCPtrMem, uint8_t iSegReg))
2073{
2074 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RoSafeJmp */
2075}
2076
2077
2078/**
2079 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2082 RTGCPTR GCPtrMem, uint8_t iSegReg))
2083{
2084 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RwSafeJmp */
2085}
2086
2087
2088/**
2089 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2092 RTGCPTR GCPtrMem, uint8_t iSegReg))
2093{
2094 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16WoSafeJmp */
2095}
2096
2097
2098/**
2099 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2100 */
2101IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2102 RTGCPTR GCPtrMem, uint8_t iSegReg))
2103{
2104 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RoSafeJmp */
2105}
2106
2107
2108/**
2109 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2110 */
2111IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2112 RTGCPTR GCPtrMem, uint8_t iSegReg))
2113{
2114 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RwSafeJmp */
2115}
2116
2117
2118/**
2119 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2122 RTGCPTR GCPtrMem, uint8_t iSegReg))
2123{
2124 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32WoSafeJmp */
2125}
2126
2127
2128/**
2129 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2130 */
2131IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2132 RTGCPTR GCPtrMem, uint8_t iSegReg))
2133{
2134 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RoSafeJmp */
2135}
2136
2137
2138/**
2139 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2140 */
2141IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2142 RTGCPTR GCPtrMem, uint8_t iSegReg))
2143{
2144 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RwSafeJmp */
2145}
2146
2147
2148/**
2149 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2150 */
2151IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2152 RTGCPTR GCPtrMem, uint8_t iSegReg))
2153{
2154 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64WoSafeJmp */
2155}
2156
2157
2158/**
2159 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2160 */
2161IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2162 RTGCPTR GCPtrMem, uint8_t iSegReg))
2163{
2164 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RoSafeJmp */
2165}
2166
2167
2168/**
2169 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2172 RTGCPTR GCPtrMem, uint8_t iSegReg))
2173{
2174 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataR80WoSafeJmp */
2175}
2176
2177
2178/**
2179 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2182 RTGCPTR GCPtrMem, uint8_t iSegReg))
2183{
2184 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataD80WoSafeJmp */
2185}
2186
2187
2188/**
2189 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2190 */
2191IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2192 RTGCPTR GCPtrMem, uint8_t iSegReg))
2193{
2194 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RwSafeJmp */
2195}
2196
2197
2198/**
2199 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2202 RTGCPTR GCPtrMem, uint8_t iSegReg))
2203{
2204 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128WoSafeJmp */
2205}
2206
2207
2208/**
2209 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2210 */
2211IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2212 RTGCPTR GCPtrMem, uint8_t iSegReg))
2213{
2214 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RoSafeJmp */
2215}
2216
2217
2218/*********************************************************************************************************************************
2219* Helpers: Flat memory mapping. *
2220*********************************************************************************************************************************/
2221
2222/**
2223 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2224 */
2225IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2226{
2227 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RwSafeJmp */
2228}
2229
2230
2231/**
2232 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2233 */
2234IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2235{
2236 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8WoSafeJmp */
2237}
2238
2239
2240/**
2241 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2242 */
2243IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2244{
2245 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RoSafeJmp */
2246}
2247
2248
2249/**
2250 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2251 */
2252IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2253{
2254 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RwSafeJmp */
2255}
2256
2257
2258/**
2259 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2260 */
2261IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2262{
2263 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16WoSafeJmp */
2264}
2265
2266
2267/**
2268 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2269 */
2270IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2271{
2272 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RoSafeJmp */
2273}
2274
2275
2276/**
2277 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2278 */
2279IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2280{
2281 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RwSafeJmp */
2282}
2283
2284
2285/**
2286 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2287 */
2288IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2289{
2290 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32WoSafeJmp */
2291}
2292
2293
2294/**
2295 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2296 */
2297IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2298{
2299 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RoSafeJmp */
2300}
2301
2302
2303/**
2304 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2305 */
2306IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2307{
2308 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RwSafeJmp */
2309}
2310
2311
2312/**
2313 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2314 */
2315IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2316{
2317 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64WoSafeJmp */
2318}
2319
2320
2321/**
2322 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2323 */
2324IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2325{
2326 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RoSafeJmp */
2327}
2328
2329
2330/**
2331 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2332 */
2333IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2334{
2335 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataR80WoSafeJmp */
2336}
2337
2338
2339/**
2340 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2343{
2344 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataD80WoSafeJmp */
2345}
2346
2347
2348/**
2349 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2350 */
2351IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2352{
2353 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RwSafeJmp */
2354}
2355
2356
2357/**
2358 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2359 */
2360IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2361{
2362 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128WoSafeJmp */
2363}
2364
2365
2366/**
2367 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2368 */
2369IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2370{
2371 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RoSafeJmp */
2372}
2373
2374
2375/*********************************************************************************************************************************
2376* Helpers: Commit, rollback & unmap *
2377*********************************************************************************************************************************/
2378
2379/**
2380 * Used by TB code to commit and unmap a read-write memory mapping.
2381 */
2382IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2383{
2384 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2385}
2386
2387
2388/**
2389 * Used by TB code to commit and unmap a write-only memory mapping.
2390 */
2391IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2392{
2393 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2394}
2395
2396
2397/**
2398 * Used by TB code to commit and unmap a read-only memory mapping.
2399 */
2400IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2401{
2402 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2403}
2404
2405
2406/**
2407 * Reinitializes the native recompiler state.
2408 *
2409 * Called before starting a new recompile job.
2410 */
2411static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2412{
2413 pReNative->cLabels = 0;
2414 pReNative->bmLabelTypes = 0;
2415 pReNative->cFixups = 0;
2416#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2417 pReNative->pDbgInfo->cEntries = 0;
2418#endif
2419 pReNative->pTbOrg = pTb;
2420 pReNative->cCondDepth = 0;
2421 pReNative->uCondSeqNo = 0;
2422 pReNative->uCheckIrqSeqNo = 0;
2423 pReNative->uTlbSeqNo = 0;
2424
2425 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2426#if IEMNATIVE_HST_GREG_COUNT < 32
2427 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2428#endif
2429 ;
2430 pReNative->Core.bmHstRegsWithGstShadow = 0;
2431 pReNative->Core.bmGstRegShadows = 0;
2432 pReNative->Core.bmVars = 0;
2433 pReNative->Core.bmStack = 0;
2434 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2435 pReNative->Core.u64ArgVars = UINT64_MAX;
2436
2437 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 7);
2438 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2439 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2440 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2441 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2442 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2443 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2444 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2445
2446 /* Full host register reinit: */
2447 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2448 {
2449 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2450 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2451 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2452 }
2453
2454 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2455 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2456#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2457 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2458#endif
2459#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2460 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2461#endif
2462 );
2463 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2464 {
2465 fRegs &= ~RT_BIT_32(idxReg);
2466 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2467 }
2468
2469 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2470#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2471 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2472#endif
2473#ifdef IEMNATIVE_REG_FIXED_TMP0
2474 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2475#endif
2476 return pReNative;
2477}
2478
2479
2480/**
2481 * Allocates and initializes the native recompiler state.
2482 *
2483 * This is called the first time an EMT wants to recompile something.
2484 *
2485 * @returns Pointer to the new recompiler state.
2486 * @param pVCpu The cross context virtual CPU structure of the calling
2487 * thread.
2488 * @param pTb The TB that's about to be recompiled.
2489 * @thread EMT(pVCpu)
2490 */
2491static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2492{
2493 VMCPU_ASSERT_EMT(pVCpu);
2494
2495 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2496 AssertReturn(pReNative, NULL);
2497
2498 /*
2499 * Try allocate all the buffers and stuff we need.
2500 */
2501 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2502 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2503 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2504#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2505 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2506#endif
2507 if (RT_LIKELY( pReNative->pInstrBuf
2508 && pReNative->paLabels
2509 && pReNative->paFixups)
2510#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2511 && pReNative->pDbgInfo
2512#endif
2513 )
2514 {
2515 /*
2516 * Set the buffer & array sizes on success.
2517 */
2518 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2519 pReNative->cLabelsAlloc = _8K;
2520 pReNative->cFixupsAlloc = _16K;
2521#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2522 pReNative->cDbgInfoAlloc = _16K;
2523#endif
2524
2525 /*
2526 * Done, just need to save it and reinit it.
2527 */
2528 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2529 return iemNativeReInit(pReNative, pTb);
2530 }
2531
2532 /*
2533 * Failed. Cleanup and return.
2534 */
2535 AssertFailed();
2536 RTMemFree(pReNative->pInstrBuf);
2537 RTMemFree(pReNative->paLabels);
2538 RTMemFree(pReNative->paFixups);
2539#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2540 RTMemFree(pReNative->pDbgInfo);
2541#endif
2542 RTMemFree(pReNative);
2543 return NULL;
2544}
2545
2546
2547/**
2548 * Creates a label
2549 *
2550 * If the label does not yet have a defined position,
2551 * call iemNativeLabelDefine() later to set it.
2552 *
2553 * @returns Label ID. Throws VBox status code on failure, so no need to check
2554 * the return value.
2555 * @param pReNative The native recompile state.
2556 * @param enmType The label type.
2557 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2558 * label is not yet defined (default).
2559 * @param uData Data associated with the lable. Only applicable to
2560 * certain type of labels. Default is zero.
2561 */
2562DECL_HIDDEN_THROW(uint32_t)
2563iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2564 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2565{
2566 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2567
2568 /*
2569 * Locate existing label definition.
2570 *
2571 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2572 * and uData is zero.
2573 */
2574 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2575 uint32_t const cLabels = pReNative->cLabels;
2576 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2577#ifndef VBOX_STRICT
2578 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2579 && offWhere == UINT32_MAX
2580 && uData == 0
2581#endif
2582 )
2583 {
2584#ifndef VBOX_STRICT
2585 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2586 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2587 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2588 if (idxLabel < pReNative->cLabels)
2589 return idxLabel;
2590#else
2591 for (uint32_t i = 0; i < cLabels; i++)
2592 if ( paLabels[i].enmType == enmType
2593 && paLabels[i].uData == uData)
2594 {
2595 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2596 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2597 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2598 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2599 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2600 return i;
2601 }
2602 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2603 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2604#endif
2605 }
2606
2607 /*
2608 * Make sure we've got room for another label.
2609 */
2610 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2611 { /* likely */ }
2612 else
2613 {
2614 uint32_t cNew = pReNative->cLabelsAlloc;
2615 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2616 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2617 cNew *= 2;
2618 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2619 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2620 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2621 pReNative->paLabels = paLabels;
2622 pReNative->cLabelsAlloc = cNew;
2623 }
2624
2625 /*
2626 * Define a new label.
2627 */
2628 paLabels[cLabels].off = offWhere;
2629 paLabels[cLabels].enmType = enmType;
2630 paLabels[cLabels].uData = uData;
2631 pReNative->cLabels = cLabels + 1;
2632
2633 Assert((unsigned)enmType < 64);
2634 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2635
2636 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2637 {
2638 Assert(uData == 0);
2639 pReNative->aidxUniqueLabels[enmType] = cLabels;
2640 }
2641
2642 if (offWhere != UINT32_MAX)
2643 {
2644#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2645 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2646 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2647#endif
2648 }
2649 return cLabels;
2650}
2651
2652
2653/**
2654 * Defines the location of an existing label.
2655 *
2656 * @param pReNative The native recompile state.
2657 * @param idxLabel The label to define.
2658 * @param offWhere The position.
2659 */
2660DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2661{
2662 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2663 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2664 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2665 pLabel->off = offWhere;
2666#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2667 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2668 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2669#endif
2670}
2671
2672
2673/**
2674 * Looks up a lable.
2675 *
2676 * @returns Label ID if found, UINT32_MAX if not.
2677 */
2678static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2679 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2680{
2681 Assert((unsigned)enmType < 64);
2682 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2683 {
2684 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2685 return pReNative->aidxUniqueLabels[enmType];
2686
2687 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2688 uint32_t const cLabels = pReNative->cLabels;
2689 for (uint32_t i = 0; i < cLabels; i++)
2690 if ( paLabels[i].enmType == enmType
2691 && paLabels[i].uData == uData
2692 && ( paLabels[i].off == offWhere
2693 || offWhere == UINT32_MAX
2694 || paLabels[i].off == UINT32_MAX))
2695 return i;
2696 }
2697 return UINT32_MAX;
2698}
2699
2700
2701/**
2702 * Adds a fixup.
2703 *
2704 * @throws VBox status code (int) on failure.
2705 * @param pReNative The native recompile state.
2706 * @param offWhere The instruction offset of the fixup location.
2707 * @param idxLabel The target label ID for the fixup.
2708 * @param enmType The fixup type.
2709 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2710 */
2711DECL_HIDDEN_THROW(void)
2712iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2713 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2714{
2715 Assert(idxLabel <= UINT16_MAX);
2716 Assert((unsigned)enmType <= UINT8_MAX);
2717
2718 /*
2719 * Make sure we've room.
2720 */
2721 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2722 uint32_t const cFixups = pReNative->cFixups;
2723 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2724 { /* likely */ }
2725 else
2726 {
2727 uint32_t cNew = pReNative->cFixupsAlloc;
2728 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2729 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2730 cNew *= 2;
2731 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2732 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2733 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2734 pReNative->paFixups = paFixups;
2735 pReNative->cFixupsAlloc = cNew;
2736 }
2737
2738 /*
2739 * Add the fixup.
2740 */
2741 paFixups[cFixups].off = offWhere;
2742 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2743 paFixups[cFixups].enmType = enmType;
2744 paFixups[cFixups].offAddend = offAddend;
2745 pReNative->cFixups = cFixups + 1;
2746}
2747
2748
2749/**
2750 * Slow code path for iemNativeInstrBufEnsure.
2751 */
2752DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2753{
2754 /* Double the buffer size till we meet the request. */
2755 uint32_t cNew = pReNative->cInstrBufAlloc;
2756 AssertReturn(cNew > 0, NULL);
2757 do
2758 cNew *= 2;
2759 while (cNew < off + cInstrReq);
2760
2761 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2762#ifdef RT_ARCH_ARM64
2763 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2764#else
2765 uint32_t const cbMaxInstrBuf = _2M;
2766#endif
2767 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2768
2769 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2770 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2771
2772 pReNative->cInstrBufAlloc = cNew;
2773 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2774}
2775
2776#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2777
2778/**
2779 * Grows the static debug info array used during recompilation.
2780 *
2781 * @returns Pointer to the new debug info block; throws VBox status code on
2782 * failure, so no need to check the return value.
2783 */
2784DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2785{
2786 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2787 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2788 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2789 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2790 pReNative->pDbgInfo = pDbgInfo;
2791 pReNative->cDbgInfoAlloc = cNew;
2792 return pDbgInfo;
2793}
2794
2795
2796/**
2797 * Adds a new debug info uninitialized entry, returning the pointer to it.
2798 */
2799DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2800{
2801 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2802 { /* likely */ }
2803 else
2804 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2805 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2806}
2807
2808
2809/**
2810 * Debug Info: Adds a native offset record, if necessary.
2811 */
2812static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2813{
2814 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2815
2816 /*
2817 * Search backwards to see if we've got a similar record already.
2818 */
2819 uint32_t idx = pDbgInfo->cEntries;
2820 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
2821 while (idx-- > idxStop)
2822 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
2823 {
2824 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
2825 return;
2826 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
2827 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2828 break;
2829 }
2830
2831 /*
2832 * Add it.
2833 */
2834 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2835 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2836 pEntry->NativeOffset.offNative = off;
2837}
2838
2839
2840/**
2841 * Debug Info: Record info about a label.
2842 */
2843static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2844{
2845 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2846 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2847 pEntry->Label.uUnused = 0;
2848 pEntry->Label.enmLabel = (uint8_t)enmType;
2849 pEntry->Label.uData = uData;
2850}
2851
2852
2853/**
2854 * Debug Info: Record info about a threaded call.
2855 */
2856static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2857{
2858 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2859 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2860 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2861 pEntry->ThreadedCall.uUnused = 0;
2862 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2863}
2864
2865
2866/**
2867 * Debug Info: Record info about a new guest instruction.
2868 */
2869static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2870{
2871 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2872 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2873 pEntry->GuestInstruction.uUnused = 0;
2874 pEntry->GuestInstruction.fExec = fExec;
2875}
2876
2877
2878/**
2879 * Debug Info: Record info about guest register shadowing.
2880 */
2881static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2882 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2883{
2884 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2885 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2886 pEntry->GuestRegShadowing.uUnused = 0;
2887 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2888 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2889 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2890}
2891
2892#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2893
2894
2895/*********************************************************************************************************************************
2896* Register Allocator *
2897*********************************************************************************************************************************/
2898
2899/**
2900 * Register parameter indexes (indexed by argument number).
2901 */
2902DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2903{
2904 IEMNATIVE_CALL_ARG0_GREG,
2905 IEMNATIVE_CALL_ARG1_GREG,
2906 IEMNATIVE_CALL_ARG2_GREG,
2907 IEMNATIVE_CALL_ARG3_GREG,
2908#if defined(IEMNATIVE_CALL_ARG4_GREG)
2909 IEMNATIVE_CALL_ARG4_GREG,
2910# if defined(IEMNATIVE_CALL_ARG5_GREG)
2911 IEMNATIVE_CALL_ARG5_GREG,
2912# if defined(IEMNATIVE_CALL_ARG6_GREG)
2913 IEMNATIVE_CALL_ARG6_GREG,
2914# if defined(IEMNATIVE_CALL_ARG7_GREG)
2915 IEMNATIVE_CALL_ARG7_GREG,
2916# endif
2917# endif
2918# endif
2919#endif
2920};
2921
2922/**
2923 * Call register masks indexed by argument count.
2924 */
2925DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2926{
2927 0,
2928 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2929 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2930 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2931 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2932 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2933#if defined(IEMNATIVE_CALL_ARG4_GREG)
2934 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2935 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2936# if defined(IEMNATIVE_CALL_ARG5_GREG)
2937 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2938 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2939# if defined(IEMNATIVE_CALL_ARG6_GREG)
2940 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2941 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2942 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2943# if defined(IEMNATIVE_CALL_ARG7_GREG)
2944 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2945 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2946 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2947# endif
2948# endif
2949# endif
2950#endif
2951};
2952
2953#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2954/**
2955 * BP offset of the stack argument slots.
2956 *
2957 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2958 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2959 */
2960DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2961{
2962 IEMNATIVE_FP_OFF_STACK_ARG0,
2963# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2964 IEMNATIVE_FP_OFF_STACK_ARG1,
2965# endif
2966# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2967 IEMNATIVE_FP_OFF_STACK_ARG2,
2968# endif
2969# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2970 IEMNATIVE_FP_OFF_STACK_ARG3,
2971# endif
2972};
2973AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2974#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2975
2976/**
2977 * Info about shadowed guest register values.
2978 * @see IEMNATIVEGSTREG
2979 */
2980static struct
2981{
2982 /** Offset in VMCPU. */
2983 uint32_t off;
2984 /** The field size. */
2985 uint8_t cb;
2986 /** Name (for logging). */
2987 const char *pszName;
2988} const g_aGstShadowInfo[] =
2989{
2990#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2991 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2992 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2993 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2994 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2995 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2996 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2997 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2998 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2999 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3000 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3001 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3002 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3003 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3004 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3005 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3006 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3007 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3008 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3009 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3010 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3011 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3012 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3013 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3014 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3015 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3016 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3017 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3018 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3019 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3020 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3021 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3022 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3023 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3024 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3025 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3026 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3027#undef CPUMCTX_OFF_AND_SIZE
3028};
3029AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3030
3031
3032/** Host CPU general purpose register names. */
3033DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3034{
3035#ifdef RT_ARCH_AMD64
3036 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3037#elif RT_ARCH_ARM64
3038 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3039 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3040#else
3041# error "port me"
3042#endif
3043};
3044
3045
3046DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3047 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3048{
3049 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3050
3051 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3052 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3053 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3054 return (uint8_t)idxReg;
3055}
3056
3057
3058/**
3059 * Tries to locate a suitable register in the given register mask.
3060 *
3061 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3062 * failed.
3063 *
3064 * @returns Host register number on success, returns UINT8_MAX on failure.
3065 */
3066static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3067{
3068 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3069 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3070 if (fRegs)
3071 {
3072 /** @todo pick better here: */
3073 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3074
3075 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3076 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3077 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3078 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3079
3080 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3081 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3082 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3083 return idxReg;
3084 }
3085 return UINT8_MAX;
3086}
3087
3088
3089/**
3090 * Locate a register, possibly freeing one up.
3091 *
3092 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3093 * failed.
3094 *
3095 * @returns Host register number on success. Returns UINT8_MAX if no registers
3096 * found, the caller is supposed to deal with this and raise a
3097 * allocation type specific status code (if desired).
3098 *
3099 * @throws VBox status code if we're run into trouble spilling a variable of
3100 * recording debug info. Does NOT throw anything if we're out of
3101 * registers, though.
3102 */
3103static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3104 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3105{
3106 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3107 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3108
3109 /*
3110 * Try a freed register that's shadowing a guest register
3111 */
3112 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3113 if (fRegs)
3114 {
3115 unsigned const idxReg = (fPreferVolatile
3116 ? ASMBitFirstSetU32(fRegs)
3117 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3118 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
3119 - 1;
3120
3121 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3122 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3123 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3124 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3125
3126 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3127 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3128 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3129 return idxReg;
3130 }
3131
3132 /*
3133 * Try free up a variable that's in a register.
3134 *
3135 * We do two rounds here, first evacuating variables we don't need to be
3136 * saved on the stack, then in the second round move things to the stack.
3137 */
3138 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3139 {
3140 uint32_t fVars = pReNative->Core.bmVars;
3141 while (fVars)
3142 {
3143 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3144 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3145 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3146 && (RT_BIT_32(idxReg) & fRegMask)
3147 && ( iLoop == 0
3148 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3149 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3150 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3151 {
3152 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3153 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3154 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3155 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3156 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3157 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3158
3159 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3160 {
3161 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3162 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3163 }
3164
3165 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3166 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3167
3168 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3169 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3170 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3171 return idxReg;
3172 }
3173 fVars &= ~RT_BIT_32(idxVar);
3174 }
3175 }
3176
3177 return UINT8_MAX;
3178}
3179
3180
3181/**
3182 * Reassigns a variable to a different register specified by the caller.
3183 *
3184 * @returns The new code buffer position.
3185 * @param pReNative The native recompile state.
3186 * @param off The current code buffer position.
3187 * @param idxVar The variable index.
3188 * @param idxRegOld The old host register number.
3189 * @param idxRegNew The new host register number.
3190 * @param pszCaller The caller for logging.
3191 */
3192static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3193 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3194{
3195 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3196 RT_NOREF(pszCaller);
3197
3198 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3199
3200 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3201 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3202 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3203 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3204
3205 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3206 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3207 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3208 if (fGstRegShadows)
3209 {
3210 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3211 | RT_BIT_32(idxRegNew);
3212 while (fGstRegShadows)
3213 {
3214 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3215 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3216
3217 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3218 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3219 }
3220 }
3221
3222 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3223 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3224 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3225 return off;
3226}
3227
3228
3229/**
3230 * Moves a variable to a different register or spills it onto the stack.
3231 *
3232 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3233 * kinds can easily be recreated if needed later.
3234 *
3235 * @returns The new code buffer position.
3236 * @param pReNative The native recompile state.
3237 * @param off The current code buffer position.
3238 * @param idxVar The variable index.
3239 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3240 * call-volatile registers.
3241 */
3242static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3243 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3244{
3245 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3246 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3247 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3248
3249 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3250 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3251 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3252 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3253 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3254 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3255 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3256 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3257 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3258
3259
3260 /** @todo Add statistics on this.*/
3261 /** @todo Implement basic variable liveness analysis (python) so variables
3262 * can be freed immediately once no longer used. This has the potential to
3263 * be trashing registers and stack for dead variables. */
3264
3265 /*
3266 * First try move it to a different register, as that's cheaper.
3267 */
3268 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3269 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3270 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3271 if (fRegs)
3272 {
3273 /* Avoid using shadow registers, if possible. */
3274 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3275 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3276 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3277 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3278 }
3279
3280 /*
3281 * Otherwise we must spill the register onto the stack.
3282 */
3283 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3284 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3285 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3286 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3287
3288 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3289 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3290 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3291 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3292 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3293 return off;
3294}
3295
3296
3297/**
3298 * Allocates a temporary host general purpose register.
3299 *
3300 * This may emit code to save register content onto the stack in order to free
3301 * up a register.
3302 *
3303 * @returns The host register number; throws VBox status code on failure,
3304 * so no need to check the return value.
3305 * @param pReNative The native recompile state.
3306 * @param poff Pointer to the variable with the code buffer position.
3307 * This will be update if we need to move a variable from
3308 * register to stack in order to satisfy the request.
3309 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3310 * registers (@c true, default) or the other way around
3311 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3312 */
3313DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3314{
3315 /*
3316 * Try find a completely unused register, preferably a call-volatile one.
3317 */
3318 uint8_t idxReg;
3319 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3320 & ~pReNative->Core.bmHstRegsWithGstShadow
3321 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3322 if (fRegs)
3323 {
3324 if (fPreferVolatile)
3325 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3326 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3327 else
3328 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3329 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3330 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3331 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3332 }
3333 else
3334 {
3335 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3336 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3337 }
3338 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3339}
3340
3341
3342/**
3343 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3344 * registers.
3345 *
3346 * @returns The host register number; throws VBox status code on failure,
3347 * so no need to check the return value.
3348 * @param pReNative The native recompile state.
3349 * @param poff Pointer to the variable with the code buffer position.
3350 * This will be update if we need to move a variable from
3351 * register to stack in order to satisfy the request.
3352 * @param fRegMask Mask of acceptable registers.
3353 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3354 * registers (@c true, default) or the other way around
3355 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3356 */
3357DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3358 bool fPreferVolatile /*= true*/)
3359{
3360 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3361 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3362
3363 /*
3364 * Try find a completely unused register, preferably a call-volatile one.
3365 */
3366 uint8_t idxReg;
3367 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3368 & ~pReNative->Core.bmHstRegsWithGstShadow
3369 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3370 & fRegMask;
3371 if (fRegs)
3372 {
3373 if (fPreferVolatile)
3374 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3375 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3376 else
3377 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3378 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3379 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3380 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3381 }
3382 else
3383 {
3384 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3385 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3386 }
3387 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3388}
3389
3390
3391/**
3392 * Allocates a temporary register for loading an immediate value into.
3393 *
3394 * This will emit code to load the immediate, unless there happens to be an
3395 * unused register with the value already loaded.
3396 *
3397 * The caller will not modify the returned register, it must be considered
3398 * read-only. Free using iemNativeRegFreeTmpImm.
3399 *
3400 * @returns The host register number; throws VBox status code on failure, so no
3401 * need to check the return value.
3402 * @param pReNative The native recompile state.
3403 * @param poff Pointer to the variable with the code buffer position.
3404 * @param uImm The immediate value that the register must hold upon
3405 * return.
3406 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3407 * registers (@c true, default) or the other way around
3408 * (@c false).
3409 *
3410 * @note Reusing immediate values has not been implemented yet.
3411 */
3412DECL_HIDDEN_THROW(uint8_t)
3413iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3414{
3415 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3416 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3417 return idxReg;
3418}
3419
3420
3421/**
3422 * Marks host register @a idxHstReg as containing a shadow copy of guest
3423 * register @a enmGstReg.
3424 *
3425 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3426 * host register before calling.
3427 */
3428DECL_FORCE_INLINE(void)
3429iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3430{
3431 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3432 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3433 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3434
3435 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3436 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
3437 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3438 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3439#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3440 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3441 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3442#else
3443 RT_NOREF(off);
3444#endif
3445}
3446
3447
3448/**
3449 * Clear any guest register shadow claims from @a idxHstReg.
3450 *
3451 * The register does not need to be shadowing any guest registers.
3452 */
3453DECL_FORCE_INLINE(void)
3454iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3455{
3456 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3457 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3458 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3459 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3460 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3461
3462#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3463 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3464 if (fGstRegs)
3465 {
3466 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3467 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3468 while (fGstRegs)
3469 {
3470 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3471 fGstRegs &= ~RT_BIT_64(iGstReg);
3472 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3473 }
3474 }
3475#else
3476 RT_NOREF(off);
3477#endif
3478
3479 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3480 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3481 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3482}
3483
3484
3485/**
3486 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3487 * and global overview flags.
3488 */
3489DECL_FORCE_INLINE(void)
3490iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3491{
3492 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3493 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3494 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3495 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3496 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3497 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3498 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3499
3500#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3501 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3502 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3503#else
3504 RT_NOREF(off);
3505#endif
3506
3507 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3508 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3509 if (!fGstRegShadowsNew)
3510 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3511 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
3512}
3513
3514
3515/**
3516 * Clear any guest register shadow claim for @a enmGstReg.
3517 */
3518DECL_FORCE_INLINE(void)
3519iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3520{
3521 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3522 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3523 {
3524 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3525 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3526 }
3527}
3528
3529
3530/**
3531 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
3532 * as the new shadow of it.
3533 */
3534DECL_FORCE_INLINE(void)
3535iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
3536 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3537{
3538 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3539 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3540 {
3541 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3542 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
3543 return;
3544 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3545 }
3546 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
3547}
3548
3549
3550/**
3551 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
3552 * to @a idxRegTo.
3553 */
3554DECL_FORCE_INLINE(void)
3555iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
3556 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3557{
3558 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
3559 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
3560 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
3561 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
3562 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3563 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
3564 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
3565 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
3566 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
3567
3568 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3569 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
3570 if (!fGstRegShadowsFrom)
3571 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
3572 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
3573 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
3574 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
3575#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3576 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3577 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
3578#else
3579 RT_NOREF(off);
3580#endif
3581}
3582
3583
3584/**
3585 * Allocates a temporary host general purpose register for keeping a guest
3586 * register value.
3587 *
3588 * Since we may already have a register holding the guest register value,
3589 * code will be emitted to do the loading if that's not the case. Code may also
3590 * be emitted if we have to free up a register to satify the request.
3591 *
3592 * @returns The host register number; throws VBox status code on failure, so no
3593 * need to check the return value.
3594 * @param pReNative The native recompile state.
3595 * @param poff Pointer to the variable with the code buffer
3596 * position. This will be update if we need to move a
3597 * variable from register to stack in order to satisfy
3598 * the request.
3599 * @param enmGstReg The guest register that will is to be updated.
3600 * @param enmIntendedUse How the caller will be using the host register.
3601 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3602 */
3603DECL_HIDDEN_THROW(uint8_t)
3604iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3605 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
3606{
3607 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3608#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3609 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3610#endif
3611
3612 /*
3613 * First check if the guest register value is already in a host register.
3614 */
3615 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3616 {
3617 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3618 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3619 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3620 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3621
3622 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3623 {
3624 /*
3625 * If the register will trash the guest shadow copy, try find a
3626 * completely unused register we can use instead. If that fails,
3627 * we need to disassociate the host reg from the guest reg.
3628 */
3629 /** @todo would be nice to know if preserving the register is in any way helpful. */
3630 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3631 && ( ~pReNative->Core.bmHstRegs
3632 & ~pReNative->Core.bmHstRegsWithGstShadow
3633 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3634 {
3635 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
3636
3637 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3638
3639 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3640 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3641 g_apszIemNativeHstRegNames[idxRegNew]));
3642 idxReg = idxRegNew;
3643 }
3644 else
3645 {
3646 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3647 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3648 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3649 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3650 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3651 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3652 else
3653 {
3654 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3655 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3656 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3657 }
3658 }
3659 }
3660 else
3661 {
3662 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3663 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3664 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3665 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3666
3667 /*
3668 * Allocate a new register, copy the value and, if updating, the
3669 * guest shadow copy assignment to the new register.
3670 */
3671 /** @todo share register for readonly access. */
3672 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3673
3674 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3675 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3676
3677 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3678 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3679 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3680 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3681 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3682 else
3683 {
3684 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3685 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3686 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3687 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3688 }
3689 idxReg = idxRegNew;
3690 }
3691
3692#ifdef VBOX_STRICT
3693 /* Strict builds: Check that the value is correct. */
3694 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3695#endif
3696
3697 return idxReg;
3698 }
3699
3700 /*
3701 * Allocate a new register, load it with the guest value and designate it as a copy of the
3702 */
3703 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3704
3705 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3706 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3707
3708 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3709 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3710 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3711 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3712
3713 return idxRegNew;
3714}
3715
3716
3717/**
3718 * Allocates a temporary host general purpose register that already holds the
3719 * given guest register value.
3720 *
3721 * The use case for this function is places where the shadowing state cannot be
3722 * modified due to branching and such. This will fail if the we don't have a
3723 * current shadow copy handy or if it's incompatible. The only code that will
3724 * be emitted here is value checking code in strict builds.
3725 *
3726 * The intended use can only be readonly!
3727 *
3728 * @returns The host register number, UINT8_MAX if not present.
3729 * @param pReNative The native recompile state.
3730 * @param poff Pointer to the instruction buffer offset.
3731 * Will be updated in strict builds if a register is
3732 * found.
3733 * @param enmGstReg The guest register that will is to be updated.
3734 * @note In strict builds, this may throw instruction buffer growth failures.
3735 * Non-strict builds will not throw anything.
3736 * @sa iemNativeRegAllocTmpForGuestReg
3737 */
3738DECL_HIDDEN_THROW(uint8_t)
3739iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3740{
3741 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3742
3743 /*
3744 * First check if the guest register value is already in a host register.
3745 */
3746 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3747 {
3748 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3749 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3750 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3751 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3752
3753 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3754 {
3755 /*
3756 * We only do readonly use here, so easy compared to the other
3757 * variant of this code.
3758 */
3759 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3760 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3761 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3762 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3763 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3764
3765#ifdef VBOX_STRICT
3766 /* Strict builds: Check that the value is correct. */
3767 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3768#else
3769 RT_NOREF(poff);
3770#endif
3771 return idxReg;
3772 }
3773 }
3774
3775 return UINT8_MAX;
3776}
3777
3778
3779DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
3780
3781
3782/**
3783 * Allocates argument registers for a function call.
3784 *
3785 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3786 * need to check the return value.
3787 * @param pReNative The native recompile state.
3788 * @param off The current code buffer offset.
3789 * @param cArgs The number of arguments the function call takes.
3790 */
3791DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3792{
3793 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3794 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3795 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3796 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3797
3798 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3799 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3800 else if (cArgs == 0)
3801 return true;
3802
3803 /*
3804 * Do we get luck and all register are free and not shadowing anything?
3805 */
3806 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3807 for (uint32_t i = 0; i < cArgs; i++)
3808 {
3809 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3810 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3811 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3812 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3813 }
3814 /*
3815 * Okay, not lucky so we have to free up the registers.
3816 */
3817 else
3818 for (uint32_t i = 0; i < cArgs; i++)
3819 {
3820 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3821 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3822 {
3823 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3824 {
3825 case kIemNativeWhat_Var:
3826 {
3827 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3828 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
3829 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3830 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3831 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3832
3833 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3834 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3835 else
3836 {
3837 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3838 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3839 }
3840 break;
3841 }
3842
3843 case kIemNativeWhat_Tmp:
3844 case kIemNativeWhat_Arg:
3845 case kIemNativeWhat_rc:
3846 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3847 default:
3848 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3849 }
3850
3851 }
3852 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3853 {
3854 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3855 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3856 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3857 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3858 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3859 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3860 }
3861 else
3862 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3863 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3864 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3865 }
3866 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3867 return true;
3868}
3869
3870
3871DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3872
3873
3874#if 0
3875/**
3876 * Frees a register assignment of any type.
3877 *
3878 * @param pReNative The native recompile state.
3879 * @param idxHstReg The register to free.
3880 *
3881 * @note Does not update variables.
3882 */
3883DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3884{
3885 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3886 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3887 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3888 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3889 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3890 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3891 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3892 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3893 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3894 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3895 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3896 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3897 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3898 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3899
3900 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3901 /* no flushing, right:
3902 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3903 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3904 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3905 */
3906}
3907#endif
3908
3909
3910/**
3911 * Frees a temporary register.
3912 *
3913 * Any shadow copies of guest registers assigned to the host register will not
3914 * be flushed by this operation.
3915 */
3916DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3917{
3918 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3919 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3920 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3921 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3922 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3923}
3924
3925
3926/**
3927 * Frees a temporary immediate register.
3928 *
3929 * It is assumed that the call has not modified the register, so it still hold
3930 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3931 */
3932DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3933{
3934 iemNativeRegFreeTmp(pReNative, idxHstReg);
3935}
3936
3937
3938/**
3939 * Frees a register assigned to a variable.
3940 *
3941 * The register will be disassociated from the variable.
3942 */
3943DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3944{
3945 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3946 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3947 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3948 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3949 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
3950
3951 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3952 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3953 if (!fFlushShadows)
3954 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
3955 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3956 else
3957 {
3958 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3959 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3960 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3961 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3962 uint64_t fGstRegShadows = fGstRegShadowsOld;
3963 while (fGstRegShadows)
3964 {
3965 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3966 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3967
3968 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3969 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3970 }
3971 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
3972 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3973 }
3974}
3975
3976
3977/**
3978 * Called right before emitting a call instruction to move anything important
3979 * out of call-volatile registers, free and flush the call-volatile registers,
3980 * optionally freeing argument variables.
3981 *
3982 * @returns New code buffer offset, UINT32_MAX on failure.
3983 * @param pReNative The native recompile state.
3984 * @param off The code buffer offset.
3985 * @param cArgs The number of arguments the function call takes.
3986 * It is presumed that the host register part of these have
3987 * been allocated as such already and won't need moving,
3988 * just freeing.
3989 * @param fKeepVars Mask of variables that should keep their register
3990 * assignments. Caller must take care to handle these.
3991 */
3992DECL_HIDDEN_THROW(uint32_t)
3993iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars = 0)
3994{
3995 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
3996
3997 /* fKeepVars will reduce this mask. */
3998 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3999
4000 /*
4001 * Move anything important out of volatile registers.
4002 */
4003 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4004 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4005 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4006#ifdef IEMNATIVE_REG_FIXED_TMP0
4007 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4008#endif
4009 & ~g_afIemNativeCallRegs[cArgs];
4010
4011 fRegsToMove &= pReNative->Core.bmHstRegs;
4012 if (!fRegsToMove)
4013 { /* likely */ }
4014 else
4015 {
4016 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4017 while (fRegsToMove != 0)
4018 {
4019 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4020 fRegsToMove &= ~RT_BIT_32(idxReg);
4021
4022 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4023 {
4024 case kIemNativeWhat_Var:
4025 {
4026 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4027 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4028 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4029 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4030 if (!(RT_BIT_32(idxVar) & fKeepVars))
4031 {
4032 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4033 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4034 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4035 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4036 else
4037 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4038 }
4039 else
4040 fRegsToFree &= ~RT_BIT_32(idxReg);
4041 continue;
4042 }
4043
4044 case kIemNativeWhat_Arg:
4045 AssertMsgFailed(("What?!?: %u\n", idxReg));
4046 continue;
4047
4048 case kIemNativeWhat_rc:
4049 case kIemNativeWhat_Tmp:
4050 AssertMsgFailed(("Missing free: %u\n", idxReg));
4051 continue;
4052
4053 case kIemNativeWhat_FixedTmp:
4054 case kIemNativeWhat_pVCpuFixed:
4055 case kIemNativeWhat_pCtxFixed:
4056 case kIemNativeWhat_FixedReserved:
4057 case kIemNativeWhat_Invalid:
4058 case kIemNativeWhat_End:
4059 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4060 }
4061 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4062 }
4063 }
4064
4065 /*
4066 * Do the actual freeing.
4067 */
4068 if (pReNative->Core.bmHstRegs & fRegsToFree)
4069 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4070 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4071 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4072
4073 /* If there are guest register shadows in any call-volatile register, we
4074 have to clear the corrsponding guest register masks for each register. */
4075 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4076 if (fHstRegsWithGstShadow)
4077 {
4078 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4079 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4080 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4081 do
4082 {
4083 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4084 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4085
4086 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4087 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4088 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4089 } while (fHstRegsWithGstShadow != 0);
4090 }
4091
4092 return off;
4093}
4094
4095
4096/**
4097 * Flushes a set of guest register shadow copies.
4098 *
4099 * This is usually done after calling a threaded function or a C-implementation
4100 * of an instruction.
4101 *
4102 * @param pReNative The native recompile state.
4103 * @param fGstRegs Set of guest registers to flush.
4104 */
4105DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4106{
4107 /*
4108 * Reduce the mask by what's currently shadowed
4109 */
4110 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4111 fGstRegs &= bmGstRegShadowsOld;
4112 if (fGstRegs)
4113 {
4114 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4115 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4116 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4117 if (bmGstRegShadowsNew)
4118 {
4119 /*
4120 * Partial.
4121 */
4122 do
4123 {
4124 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4125 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4126 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4127 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4128 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4129
4130 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4131 fGstRegs &= ~fInThisHstReg;
4132 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4133 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4134 if (!fGstRegShadowsNew)
4135 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4136 } while (fGstRegs != 0);
4137 }
4138 else
4139 {
4140 /*
4141 * Clear all.
4142 */
4143 do
4144 {
4145 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4146 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4147 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4148 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4149 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4150
4151 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4152 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4153 } while (fGstRegs != 0);
4154 pReNative->Core.bmHstRegsWithGstShadow = 0;
4155 }
4156 }
4157}
4158
4159
4160/**
4161 * Flushes delayed write of a specific guest register.
4162 *
4163 * This must be called prior to calling CImpl functions and any helpers that use
4164 * the guest state (like raising exceptions) and such.
4165 *
4166 * This optimization has not yet been implemented. The first target would be
4167 * RIP updates, since these are the most common ones.
4168 */
4169DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4170 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4171{
4172 RT_NOREF(pReNative, enmClass, idxReg);
4173 return off;
4174}
4175
4176
4177/**
4178 * Flushes any delayed guest register writes.
4179 *
4180 * This must be called prior to calling CImpl functions and any helpers that use
4181 * the guest state (like raising exceptions) and such.
4182 *
4183 * This optimization has not yet been implemented. The first target would be
4184 * RIP updates, since these are the most common ones.
4185 */
4186DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4187{
4188 RT_NOREF(pReNative, off);
4189 return off;
4190}
4191
4192
4193#ifdef VBOX_STRICT
4194/**
4195 * Does internal register allocator sanity checks.
4196 */
4197static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
4198{
4199 /*
4200 * Iterate host registers building a guest shadowing set.
4201 */
4202 uint64_t bmGstRegShadows = 0;
4203 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
4204 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
4205 while (bmHstRegsWithGstShadow)
4206 {
4207 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
4208 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4209 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4210
4211 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4212 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
4213 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
4214 bmGstRegShadows |= fThisGstRegShadows;
4215 while (fThisGstRegShadows)
4216 {
4217 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
4218 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
4219 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
4220 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
4221 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
4222 }
4223 }
4224 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
4225 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
4226 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
4227
4228 /*
4229 * Now the other way around, checking the guest to host index array.
4230 */
4231 bmHstRegsWithGstShadow = 0;
4232 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
4233 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4234 while (bmGstRegShadows)
4235 {
4236 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
4237 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4238 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
4239
4240 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4241 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
4242 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
4243 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
4244 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4245 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4246 }
4247 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4248 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4249 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4250}
4251#endif
4252
4253
4254/*********************************************************************************************************************************
4255* Code Emitters (larger snippets) *
4256*********************************************************************************************************************************/
4257
4258/**
4259 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4260 * extending to 64-bit width.
4261 *
4262 * @returns New code buffer offset on success, UINT32_MAX on failure.
4263 * @param pReNative .
4264 * @param off The current code buffer position.
4265 * @param idxHstReg The host register to load the guest register value into.
4266 * @param enmGstReg The guest register to load.
4267 *
4268 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4269 * that is something the caller needs to do if applicable.
4270 */
4271DECL_HIDDEN_THROW(uint32_t)
4272iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4273{
4274 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4275 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4276
4277 switch (g_aGstShadowInfo[enmGstReg].cb)
4278 {
4279 case sizeof(uint64_t):
4280 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4281 case sizeof(uint32_t):
4282 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4283 case sizeof(uint16_t):
4284 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4285#if 0 /* not present in the table. */
4286 case sizeof(uint8_t):
4287 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4288#endif
4289 default:
4290 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4291 }
4292}
4293
4294
4295#ifdef VBOX_STRICT
4296/**
4297 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4298 *
4299 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4300 * Trashes EFLAGS on AMD64.
4301 */
4302static uint32_t
4303iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4304{
4305# ifdef RT_ARCH_AMD64
4306 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4307
4308 /* rol reg64, 32 */
4309 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4310 pbCodeBuf[off++] = 0xc1;
4311 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4312 pbCodeBuf[off++] = 32;
4313
4314 /* test reg32, ffffffffh */
4315 if (idxReg >= 8)
4316 pbCodeBuf[off++] = X86_OP_REX_B;
4317 pbCodeBuf[off++] = 0xf7;
4318 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4319 pbCodeBuf[off++] = 0xff;
4320 pbCodeBuf[off++] = 0xff;
4321 pbCodeBuf[off++] = 0xff;
4322 pbCodeBuf[off++] = 0xff;
4323
4324 /* je/jz +1 */
4325 pbCodeBuf[off++] = 0x74;
4326 pbCodeBuf[off++] = 0x01;
4327
4328 /* int3 */
4329 pbCodeBuf[off++] = 0xcc;
4330
4331 /* rol reg64, 32 */
4332 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4333 pbCodeBuf[off++] = 0xc1;
4334 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4335 pbCodeBuf[off++] = 32;
4336
4337# elif defined(RT_ARCH_ARM64)
4338 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4339 /* lsr tmp0, reg64, #32 */
4340 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4341 /* cbz tmp0, +1 */
4342 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4343 /* brk #0x1100 */
4344 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4345
4346# else
4347# error "Port me!"
4348# endif
4349 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4350 return off;
4351}
4352#endif /* VBOX_STRICT */
4353
4354
4355#ifdef VBOX_STRICT
4356/**
4357 * Emitting code that checks that the content of register @a idxReg is the same
4358 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4359 * instruction if that's not the case.
4360 *
4361 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4362 * Trashes EFLAGS on AMD64.
4363 */
4364static uint32_t
4365iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
4366{
4367# ifdef RT_ARCH_AMD64
4368 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4369
4370 /* cmp reg, [mem] */
4371 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
4372 {
4373 if (idxReg >= 8)
4374 pbCodeBuf[off++] = X86_OP_REX_R;
4375 pbCodeBuf[off++] = 0x38;
4376 }
4377 else
4378 {
4379 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
4380 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
4381 else
4382 {
4383 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
4384 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4385 else
4386 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
4387 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
4388 if (idxReg >= 8)
4389 pbCodeBuf[off++] = X86_OP_REX_R;
4390 }
4391 pbCodeBuf[off++] = 0x39;
4392 }
4393 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
4394
4395 /* je/jz +1 */
4396 pbCodeBuf[off++] = 0x74;
4397 pbCodeBuf[off++] = 0x01;
4398
4399 /* int3 */
4400 pbCodeBuf[off++] = 0xcc;
4401
4402 /* For values smaller than the register size, we must check that the rest
4403 of the register is all zeros. */
4404 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
4405 {
4406 /* test reg64, imm32 */
4407 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4408 pbCodeBuf[off++] = 0xf7;
4409 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4410 pbCodeBuf[off++] = 0;
4411 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
4412 pbCodeBuf[off++] = 0xff;
4413 pbCodeBuf[off++] = 0xff;
4414
4415 /* je/jz +1 */
4416 pbCodeBuf[off++] = 0x74;
4417 pbCodeBuf[off++] = 0x01;
4418
4419 /* int3 */
4420 pbCodeBuf[off++] = 0xcc;
4421 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4422 }
4423 else
4424 {
4425 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4426 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
4427 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
4428 }
4429
4430# elif defined(RT_ARCH_ARM64)
4431 /* mov TMP0, [gstreg] */
4432 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
4433
4434 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4435 /* sub tmp0, tmp0, idxReg */
4436 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
4437 /* cbz tmp0, +1 */
4438 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4439 /* brk #0x1000+enmGstReg */
4440 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
4441 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4442
4443# else
4444# error "Port me!"
4445# endif
4446 return off;
4447}
4448#endif /* VBOX_STRICT */
4449
4450
4451#ifdef VBOX_STRICT
4452/**
4453 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
4454 * important bits.
4455 *
4456 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4457 * Trashes EFLAGS on AMD64.
4458 */
4459static uint32_t
4460iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
4461{
4462 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4463 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
4464 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
4465 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
4466
4467#ifdef RT_ARCH_AMD64
4468 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4469
4470 /* je/jz +1 */
4471 pbCodeBuf[off++] = 0x74;
4472 pbCodeBuf[off++] = 0x01;
4473
4474 /* int3 */
4475 pbCodeBuf[off++] = 0xcc;
4476
4477# elif defined(RT_ARCH_ARM64)
4478 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4479
4480 /* b.eq +1 */
4481 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
4482 /* brk #0x2000 */
4483 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
4484
4485# else
4486# error "Port me!"
4487# endif
4488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4489
4490 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4491 return off;
4492}
4493#endif /* VBOX_STRICT */
4494
4495
4496/**
4497 * Emits a code for checking the return code of a call and rcPassUp, returning
4498 * from the code if either are non-zero.
4499 */
4500DECL_HIDDEN_THROW(uint32_t)
4501iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
4502{
4503#ifdef RT_ARCH_AMD64
4504 /*
4505 * AMD64: eax = call status code.
4506 */
4507
4508 /* edx = rcPassUp */
4509 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4510# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4511 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
4512# endif
4513
4514 /* edx = eax | rcPassUp */
4515 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4516 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
4517 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
4518 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4519
4520 /* Jump to non-zero status return path. */
4521 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
4522
4523 /* done. */
4524
4525#elif RT_ARCH_ARM64
4526 /*
4527 * ARM64: w0 = call status code.
4528 */
4529# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4530 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
4531# endif
4532 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4533
4534 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4535
4536 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
4537
4538 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4539 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
4540 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
4541
4542#else
4543# error "port me"
4544#endif
4545 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4546 return off;
4547}
4548
4549
4550/**
4551 * Emits code to check if the content of @a idxAddrReg is a canonical address,
4552 * raising a \#GP(0) if it isn't.
4553 *
4554 * @returns New code buffer offset, UINT32_MAX on failure.
4555 * @param pReNative The native recompile state.
4556 * @param off The code buffer offset.
4557 * @param idxAddrReg The host register with the address to check.
4558 * @param idxInstr The current instruction.
4559 */
4560DECL_HIDDEN_THROW(uint32_t)
4561iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
4562{
4563 /*
4564 * Make sure we don't have any outstanding guest register writes as we may
4565 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4566 */
4567 off = iemNativeRegFlushPendingWrites(pReNative, off);
4568
4569#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4570 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4571#else
4572 RT_NOREF(idxInstr);
4573#endif
4574
4575#ifdef RT_ARCH_AMD64
4576 /*
4577 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
4578 * return raisexcpt();
4579 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
4580 */
4581 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4582
4583 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
4584 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
4585 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
4586 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
4587 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4588
4589 iemNativeRegFreeTmp(pReNative, iTmpReg);
4590
4591#elif defined(RT_ARCH_ARM64)
4592 /*
4593 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
4594 * return raisexcpt();
4595 * ----
4596 * mov x1, 0x800000000000
4597 * add x1, x0, x1
4598 * cmp xzr, x1, lsr 48
4599 * b.ne .Lraisexcpt
4600 */
4601 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4602
4603 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
4604 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
4605 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
4606 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4607
4608 iemNativeRegFreeTmp(pReNative, iTmpReg);
4609
4610#else
4611# error "Port me"
4612#endif
4613 return off;
4614}
4615
4616
4617/**
4618 * Emits code to check if the content of @a idxAddrReg is within the limit of
4619 * idxSegReg, raising a \#GP(0) if it isn't.
4620 *
4621 * @returns New code buffer offset; throws VBox status code on error.
4622 * @param pReNative The native recompile state.
4623 * @param off The code buffer offset.
4624 * @param idxAddrReg The host register (32-bit) with the address to
4625 * check.
4626 * @param idxSegReg The segment register (X86_SREG_XXX) to check
4627 * against.
4628 * @param idxInstr The current instruction.
4629 */
4630DECL_HIDDEN_THROW(uint32_t)
4631iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4632 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
4633{
4634 /*
4635 * Make sure we don't have any outstanding guest register writes as we may
4636 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4637 */
4638 off = iemNativeRegFlushPendingWrites(pReNative, off);
4639
4640#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4641 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4642#else
4643 RT_NOREF(idxInstr);
4644#endif
4645
4646 /** @todo implement expand down/whatnot checking */
4647 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
4648
4649 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4650 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
4651 kIemNativeGstRegUse_ForUpdate);
4652
4653 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
4654 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4655
4656 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
4657 return off;
4658}
4659
4660
4661/**
4662 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
4663 *
4664 * @returns The flush mask.
4665 * @param fCImpl The IEM_CIMPL_F_XXX flags.
4666 * @param fGstShwFlush The starting flush mask.
4667 */
4668DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
4669{
4670 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
4671 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
4672 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
4673 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
4674 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
4675 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
4676 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
4677 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
4678 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
4679 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
4680 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
4681 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
4682 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4683 return fGstShwFlush;
4684}
4685
4686
4687/**
4688 * Emits a call to a CImpl function or something similar.
4689 */
4690static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
4691 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
4692 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
4693{
4694 /*
4695 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
4696 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
4697 */
4698 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
4699 fGstShwFlush
4700 | RT_BIT_64(kIemNativeGstReg_Pc)
4701 | RT_BIT_64(kIemNativeGstReg_EFlags));
4702 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4703
4704 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4705
4706 /*
4707 * Load the parameters.
4708 */
4709#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
4710 /* Special code the hidden VBOXSTRICTRC pointer. */
4711 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4712 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4713 if (cAddParams > 0)
4714 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
4715 if (cAddParams > 1)
4716 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
4717 if (cAddParams > 2)
4718 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
4719 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4720
4721#else
4722 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4723 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4724 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4725 if (cAddParams > 0)
4726 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
4727 if (cAddParams > 1)
4728 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
4729 if (cAddParams > 2)
4730# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
4731 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
4732# else
4733 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
4734# endif
4735#endif
4736
4737 /*
4738 * Make the call.
4739 */
4740 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
4741
4742#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4743 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4744#endif
4745
4746 /*
4747 * Check the status code.
4748 */
4749 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4750}
4751
4752
4753/**
4754 * Emits a call to a threaded worker function.
4755 */
4756static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
4757{
4758 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
4759 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4760
4761#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4762 /* The threaded function may throw / long jmp, so set current instruction
4763 number if we're counting. */
4764 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4765#endif
4766
4767 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
4768
4769#ifdef RT_ARCH_AMD64
4770 /* Load the parameters and emit the call. */
4771# ifdef RT_OS_WINDOWS
4772# ifndef VBOXSTRICTRC_STRICT_ENABLED
4773 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4774 if (cParams > 0)
4775 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
4776 if (cParams > 1)
4777 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
4778 if (cParams > 2)
4779 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
4780# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
4781 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
4782 if (cParams > 0)
4783 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
4784 if (cParams > 1)
4785 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
4786 if (cParams > 2)
4787 {
4788 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
4789 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
4790 }
4791 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4792# endif /* VBOXSTRICTRC_STRICT_ENABLED */
4793# else
4794 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4795 if (cParams > 0)
4796 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
4797 if (cParams > 1)
4798 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
4799 if (cParams > 2)
4800 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
4801# endif
4802
4803 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4804
4805# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4806 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4807# endif
4808
4809#elif RT_ARCH_ARM64
4810 /*
4811 * ARM64:
4812 */
4813 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4814 if (cParams > 0)
4815 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
4816 if (cParams > 1)
4817 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
4818 if (cParams > 2)
4819 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
4820
4821 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4822
4823#else
4824# error "port me"
4825#endif
4826
4827 /*
4828 * Check the status code.
4829 */
4830 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
4831
4832 return off;
4833}
4834
4835
4836/**
4837 * Emits the code at the ObsoleteTb label.
4838 */
4839static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4840{
4841 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
4842 if (idxLabel != UINT32_MAX)
4843 {
4844 iemNativeLabelDefine(pReNative, idxLabel, off);
4845
4846 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
4847 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4848 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
4849
4850 /* jump back to the return sequence. */
4851 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4852 }
4853 return off;
4854}
4855
4856
4857/**
4858 * Emits the code at the RaiseGP0 label.
4859 */
4860static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4861{
4862 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
4863 if (idxLabel != UINT32_MAX)
4864 {
4865 iemNativeLabelDefine(pReNative, idxLabel, off);
4866
4867 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
4868 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4869 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
4870
4871 /* jump back to the return sequence. */
4872 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4873 }
4874 return off;
4875}
4876
4877
4878/**
4879 * Emits the code at the ReturnWithFlags label (returns
4880 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
4881 */
4882static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4883{
4884 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
4885 if (idxLabel != UINT32_MAX)
4886 {
4887 iemNativeLabelDefine(pReNative, idxLabel, off);
4888
4889 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
4890
4891 /* jump back to the return sequence. */
4892 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4893 }
4894 return off;
4895}
4896
4897
4898/**
4899 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
4900 */
4901static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4902{
4903 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
4904 if (idxLabel != UINT32_MAX)
4905 {
4906 iemNativeLabelDefine(pReNative, idxLabel, off);
4907
4908 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
4909
4910 /* jump back to the return sequence. */
4911 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4912 }
4913 return off;
4914}
4915
4916
4917/**
4918 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
4919 */
4920static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4921{
4922 /*
4923 * Generate the rc + rcPassUp fiddling code if needed.
4924 */
4925 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4926 if (idxLabel != UINT32_MAX)
4927 {
4928 iemNativeLabelDefine(pReNative, idxLabel, off);
4929
4930 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
4931#ifdef RT_ARCH_AMD64
4932# ifdef RT_OS_WINDOWS
4933# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4934 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
4935# endif
4936 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4937 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
4938# else
4939 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4940 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
4941# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4942 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
4943# endif
4944# endif
4945# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4946 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
4947# endif
4948
4949#else
4950 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
4951 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4952 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
4953#endif
4954
4955 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
4956 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4957 }
4958 return off;
4959}
4960
4961
4962/**
4963 * Emits a standard epilog.
4964 */
4965static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
4966{
4967 *pidxReturnLabel = UINT32_MAX;
4968
4969 /*
4970 * Successful return, so clear the return register (eax, w0).
4971 */
4972 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
4973
4974 /*
4975 * Define label for common return point.
4976 */
4977 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
4978 *pidxReturnLabel = idxReturn;
4979
4980 /*
4981 * Restore registers and return.
4982 */
4983#ifdef RT_ARCH_AMD64
4984 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4985
4986 /* Reposition esp at the r15 restore point. */
4987 pbCodeBuf[off++] = X86_OP_REX_W;
4988 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
4989 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
4990 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
4991
4992 /* Pop non-volatile registers and return */
4993 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
4994 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
4995 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
4996 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
4997 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
4998 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
4999 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5000 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5001# ifdef RT_OS_WINDOWS
5002 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5003 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5004# endif
5005 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5006 pbCodeBuf[off++] = 0xc9; /* leave */
5007 pbCodeBuf[off++] = 0xc3; /* ret */
5008 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5009
5010#elif RT_ARCH_ARM64
5011 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5012
5013 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5014 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5015 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5016 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5017 IEMNATIVE_FRAME_VAR_SIZE / 8);
5018 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5019 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5020 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5021 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5022 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5023 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5024 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5025 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5026 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5027 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5028 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5029 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5030
5031 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5032 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5033 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5034 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5035
5036 /* retab / ret */
5037# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5038 if (1)
5039 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5040 else
5041# endif
5042 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5043
5044#else
5045# error "port me"
5046#endif
5047 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5048
5049 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5050}
5051
5052
5053/**
5054 * Emits a standard prolog.
5055 */
5056static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5057{
5058#ifdef RT_ARCH_AMD64
5059 /*
5060 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5061 * reserving 64 bytes for stack variables plus 4 non-register argument
5062 * slots. Fixed register assignment: xBX = pReNative;
5063 *
5064 * Since we always do the same register spilling, we can use the same
5065 * unwind description for all the code.
5066 */
5067 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5068 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5069 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5070 pbCodeBuf[off++] = 0x8b;
5071 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5072 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5073 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5074# ifdef RT_OS_WINDOWS
5075 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5076 pbCodeBuf[off++] = 0x8b;
5077 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5078 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5079 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5080# else
5081 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5082 pbCodeBuf[off++] = 0x8b;
5083 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5084# endif
5085 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5086 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5087 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5088 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5089 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5090 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5091 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5092 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5093
5094 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5095 X86_GREG_xSP,
5096 IEMNATIVE_FRAME_ALIGN_SIZE
5097 + IEMNATIVE_FRAME_VAR_SIZE
5098 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5099 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5100 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5101 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5102 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5103
5104#elif RT_ARCH_ARM64
5105 /*
5106 * We set up a stack frame exactly like on x86, only we have to push the
5107 * return address our selves here. We save all non-volatile registers.
5108 */
5109 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5110
5111# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5112 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5113 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5114 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5115 /* pacibsp */
5116 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5117# endif
5118
5119 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5120 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5121 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5122 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5123 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5124 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5125 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5126 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5127 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5128 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5129 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5130 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5131 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5132 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5133 /* Save the BP and LR (ret address) registers at the top of the frame. */
5134 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5135 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5136 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5137 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5138 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
5139 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
5140
5141 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
5142 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
5143
5144 /* mov r28, r0 */
5145 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
5146 /* mov r27, r1 */
5147 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
5148
5149#else
5150# error "port me"
5151#endif
5152 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5153 return off;
5154}
5155
5156
5157
5158
5159/*********************************************************************************************************************************
5160* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
5161*********************************************************************************************************************************/
5162
5163#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
5164 { \
5165 Assert(pReNative->Core.bmVars == 0); \
5166 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
5167 Assert(pReNative->Core.bmStack == 0); \
5168 pReNative->fMc = (a_fMcFlags); \
5169 pReNative->fCImpl = (a_fCImplFlags); \
5170 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
5171
5172/** We have to get to the end in recompilation mode, as otherwise we won't
5173 * generate code for all the IEM_MC_IF_XXX branches. */
5174#define IEM_MC_END() \
5175 iemNativeVarFreeAll(pReNative); \
5176 } return off
5177
5178
5179
5180/*********************************************************************************************************************************
5181* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
5182*********************************************************************************************************************************/
5183
5184#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
5185 pReNative->fMc = 0; \
5186 pReNative->fCImpl = (a_fFlags); \
5187 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
5188
5189
5190#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5191 pReNative->fMc = 0; \
5192 pReNative->fCImpl = (a_fFlags); \
5193 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
5194
5195DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5196 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5197 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
5198{
5199 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
5200}
5201
5202
5203#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5204 pReNative->fMc = 0; \
5205 pReNative->fCImpl = (a_fFlags); \
5206 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5207 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
5208
5209DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5210 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5211 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
5212{
5213 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
5214}
5215
5216
5217#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5218 pReNative->fMc = 0; \
5219 pReNative->fCImpl = (a_fFlags); \
5220 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5221 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
5222
5223DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5224 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5225 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
5226 uint64_t uArg2)
5227{
5228 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
5229}
5230
5231
5232
5233/*********************************************************************************************************************************
5234* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
5235*********************************************************************************************************************************/
5236
5237/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
5238 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5239DECL_INLINE_THROW(uint32_t)
5240iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5241{
5242 /*
5243 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5244 * return with special status code and make the execution loop deal with
5245 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5246 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5247 * could continue w/o interruption, it probably will drop into the
5248 * debugger, so not worth the effort of trying to services it here and we
5249 * just lump it in with the handling of the others.
5250 *
5251 * To simplify the code and the register state management even more (wrt
5252 * immediate in AND operation), we always update the flags and skip the
5253 * extra check associated conditional jump.
5254 */
5255 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5256 <= UINT32_MAX);
5257 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5258 kIemNativeGstRegUse_ForUpdate);
5259 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5260 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5261 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5262 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5263 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5264
5265 /* Free but don't flush the EFLAGS register. */
5266 iemNativeRegFreeTmp(pReNative, idxEflReg);
5267
5268 return off;
5269}
5270
5271
5272#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
5273 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5274
5275#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
5276 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
5277 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5278
5279/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5280DECL_INLINE_THROW(uint32_t)
5281iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5282{
5283 /* Allocate a temporary PC register. */
5284 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5285
5286 /* Perform the addition and store the result. */
5287 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5288 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5289
5290 /* Free but don't flush the PC register. */
5291 iemNativeRegFreeTmp(pReNative, idxPcReg);
5292
5293 return off;
5294}
5295
5296
5297#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
5298 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5299
5300#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
5301 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
5302 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5303
5304/** Same as iemRegAddToEip32AndFinishingNoFlags. */
5305DECL_INLINE_THROW(uint32_t)
5306iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5307{
5308 /* Allocate a temporary PC register. */
5309 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5310
5311 /* Perform the addition and store the result. */
5312 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5313 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5314
5315 /* Free but don't flush the PC register. */
5316 iemNativeRegFreeTmp(pReNative, idxPcReg);
5317
5318 return off;
5319}
5320
5321
5322#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
5323 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5324
5325#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
5326 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
5327 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5328
5329/** Same as iemRegAddToIp16AndFinishingNoFlags. */
5330DECL_INLINE_THROW(uint32_t)
5331iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5332{
5333 /* Allocate a temporary PC register. */
5334 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5335
5336 /* Perform the addition and store the result. */
5337 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5338 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5339 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5340
5341 /* Free but don't flush the PC register. */
5342 iemNativeRegFreeTmp(pReNative, idxPcReg);
5343
5344 return off;
5345}
5346
5347
5348
5349/*********************************************************************************************************************************
5350* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
5351*********************************************************************************************************************************/
5352
5353#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
5354 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5355 (a_enmEffOpSize), pCallEntry->idxInstr)
5356
5357#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5358 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
5359 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5360
5361#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
5362 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5363 IEMMODE_16BIT, pCallEntry->idxInstr)
5364
5365#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
5366 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
5367 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5368
5369#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
5370 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5371 IEMMODE_64BIT, pCallEntry->idxInstr)
5372
5373#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
5374 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
5375 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5376
5377/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
5378 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
5379 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
5380DECL_INLINE_THROW(uint32_t)
5381iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5382 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5383{
5384 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
5385
5386 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5387 off = iemNativeRegFlushPendingWrites(pReNative, off);
5388
5389 /* Allocate a temporary PC register. */
5390 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5391
5392 /* Perform the addition. */
5393 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
5394
5395 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
5396 {
5397 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5398 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5399 }
5400 else
5401 {
5402 /* Just truncate the result to 16-bit IP. */
5403 Assert(enmEffOpSize == IEMMODE_16BIT);
5404 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5405 }
5406 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5407
5408 /* Free but don't flush the PC register. */
5409 iemNativeRegFreeTmp(pReNative, idxPcReg);
5410
5411 return off;
5412}
5413
5414
5415#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
5416 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5417 (a_enmEffOpSize), pCallEntry->idxInstr)
5418
5419#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5420 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
5421 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5422
5423#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
5424 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5425 IEMMODE_16BIT, pCallEntry->idxInstr)
5426
5427#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
5428 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
5429 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5430
5431#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
5432 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5433 IEMMODE_32BIT, pCallEntry->idxInstr)
5434
5435#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
5436 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
5437 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5438
5439/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
5440 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
5441 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
5442DECL_INLINE_THROW(uint32_t)
5443iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5444 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5445{
5446 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
5447
5448 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5449 off = iemNativeRegFlushPendingWrites(pReNative, off);
5450
5451 /* Allocate a temporary PC register. */
5452 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5453
5454 /* Perform the addition. */
5455 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5456
5457 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
5458 if (enmEffOpSize == IEMMODE_16BIT)
5459 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5460
5461 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
5462 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5463
5464 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5465
5466 /* Free but don't flush the PC register. */
5467 iemNativeRegFreeTmp(pReNative, idxPcReg);
5468
5469 return off;
5470}
5471
5472
5473#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
5474 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
5475
5476#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
5477 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
5478 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5479
5480#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
5481 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
5482
5483#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
5484 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
5485 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5486
5487#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
5488 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
5489
5490#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
5491 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
5492 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5493
5494/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
5495DECL_INLINE_THROW(uint32_t)
5496iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5497 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
5498{
5499 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5500 off = iemNativeRegFlushPendingWrites(pReNative, off);
5501
5502 /* Allocate a temporary PC register. */
5503 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5504
5505 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
5506 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5507 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5508 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5509 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5510
5511 /* Free but don't flush the PC register. */
5512 iemNativeRegFreeTmp(pReNative, idxPcReg);
5513
5514 return off;
5515}
5516
5517
5518
5519/*********************************************************************************************************************************
5520* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
5521*********************************************************************************************************************************/
5522
5523/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
5524#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
5525 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5526
5527/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
5528#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
5529 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5530
5531/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
5532#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
5533 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5534
5535/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
5536 * clears flags. */
5537#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
5538 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
5539 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5540
5541/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
5542 * clears flags. */
5543#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
5544 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
5545 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5546
5547/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
5548 * clears flags. */
5549#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
5550 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
5551 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5552
5553#undef IEM_MC_SET_RIP_U16_AND_FINISH
5554
5555
5556/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
5557#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
5558 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5559
5560/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
5561#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
5562 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5563
5564/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
5565 * clears flags. */
5566#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
5567 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
5568 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5569
5570/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
5571 * and clears flags. */
5572#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
5573 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
5574 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5575
5576#undef IEM_MC_SET_RIP_U32_AND_FINISH
5577
5578
5579/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
5580#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
5581 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
5582
5583/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
5584 * and clears flags. */
5585#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
5586 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
5587 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5588
5589#undef IEM_MC_SET_RIP_U64_AND_FINISH
5590
5591
5592/** Same as iemRegRipJumpU16AndFinishNoFlags,
5593 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
5594DECL_INLINE_THROW(uint32_t)
5595iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
5596 uint8_t idxInstr, uint8_t cbVar)
5597{
5598 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
5599 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
5600
5601 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5602 off = iemNativeRegFlushPendingWrites(pReNative, off);
5603
5604 /* Get a register with the new PC loaded from idxVarPc.
5605 Note! This ASSUMES that the high bits of the GPR is zeroed. */
5606 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
5607
5608 /* Check limit (may #GP(0) + exit TB). */
5609 if (!f64Bit)
5610 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5611 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5612 else if (cbVar > sizeof(uint32_t))
5613 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5614
5615 /* Store the result. */
5616 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5617
5618 /** @todo implictly free the variable? */
5619
5620 return off;
5621}
5622
5623
5624
5625/*********************************************************************************************************************************
5626* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
5627*********************************************************************************************************************************/
5628
5629/**
5630 * Pushes an IEM_MC_IF_XXX onto the condition stack.
5631 *
5632 * @returns Pointer to the condition stack entry on success, NULL on failure
5633 * (too many nestings)
5634 */
5635DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
5636{
5637 uint32_t const idxStack = pReNative->cCondDepth;
5638 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
5639
5640 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
5641 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
5642
5643 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
5644 pEntry->fInElse = false;
5645 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
5646 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
5647
5648 return pEntry;
5649}
5650
5651
5652/**
5653 * Start of the if-block, snapshotting the register and variable state.
5654 */
5655DECL_INLINE_THROW(void)
5656iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
5657{
5658 Assert(offIfBlock != UINT32_MAX);
5659 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5660 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5661 Assert(!pEntry->fInElse);
5662
5663 /* Define the start of the IF block if request or for disassembly purposes. */
5664 if (idxLabelIf != UINT32_MAX)
5665 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
5666#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5667 else
5668 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
5669#else
5670 RT_NOREF(offIfBlock);
5671#endif
5672
5673 /* Copy the initial state so we can restore it in the 'else' block. */
5674 pEntry->InitialState = pReNative->Core;
5675}
5676
5677
5678#define IEM_MC_ELSE() } while (0); \
5679 off = iemNativeEmitElse(pReNative, off); \
5680 do {
5681
5682/** Emits code related to IEM_MC_ELSE. */
5683DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5684{
5685 /* Check sanity and get the conditional stack entry. */
5686 Assert(off != UINT32_MAX);
5687 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5688 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5689 Assert(!pEntry->fInElse);
5690
5691 /* Jump to the endif */
5692 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
5693
5694 /* Define the else label and enter the else part of the condition. */
5695 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5696 pEntry->fInElse = true;
5697
5698 /* Snapshot the core state so we can do a merge at the endif and restore
5699 the snapshot we took at the start of the if-block. */
5700 pEntry->IfFinalState = pReNative->Core;
5701 pReNative->Core = pEntry->InitialState;
5702
5703 return off;
5704}
5705
5706
5707#define IEM_MC_ENDIF() } while (0); \
5708 off = iemNativeEmitEndIf(pReNative, off)
5709
5710/** Emits code related to IEM_MC_ENDIF. */
5711DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5712{
5713 /* Check sanity and get the conditional stack entry. */
5714 Assert(off != UINT32_MAX);
5715 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5716 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5717
5718 /*
5719 * Now we have find common group with the core state at the end of the
5720 * if-final. Use the smallest common denominator and just drop anything
5721 * that isn't the same in both states.
5722 */
5723 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
5724 * which is why we're doing this at the end of the else-block.
5725 * But we'd need more info about future for that to be worth the effort. */
5726 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
5727 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
5728 {
5729 /* shadow guest stuff first. */
5730 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
5731 if (fGstRegs)
5732 {
5733 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
5734 do
5735 {
5736 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5737 fGstRegs &= ~RT_BIT_64(idxGstReg);
5738
5739 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5740 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
5741 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
5742 {
5743 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
5744 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
5745 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
5746 }
5747 } while (fGstRegs);
5748 }
5749 else
5750 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
5751
5752 /* Check variables next. For now we must require them to be identical
5753 or stuff we can recreate. */
5754 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
5755 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
5756 if (fVars)
5757 {
5758 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
5759 do
5760 {
5761 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
5762 fVars &= ~RT_BIT_32(idxVar);
5763
5764 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
5765 {
5766 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
5767 continue;
5768 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5769 {
5770 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5771 if (idxHstReg != UINT8_MAX)
5772 {
5773 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5774 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5775 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
5776 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5777 }
5778 continue;
5779 }
5780 }
5781 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
5782 continue;
5783
5784 /* Irreconcilable, so drop it. */
5785 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5786 if (idxHstReg != UINT8_MAX)
5787 {
5788 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5789 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5790 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
5791 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5792 }
5793 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
5794 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5795 } while (fVars);
5796 }
5797
5798 /* Finally, check that the host register allocations matches. */
5799 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
5800 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
5801 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
5802 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
5803 }
5804
5805 /*
5806 * Define the endif label and maybe the else one if we're still in the 'if' part.
5807 */
5808 if (!pEntry->fInElse)
5809 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5810 else
5811 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
5812 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
5813
5814 /* Pop the conditional stack.*/
5815 pReNative->cCondDepth -= 1;
5816
5817 return off;
5818}
5819
5820
5821#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
5822 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
5823 do {
5824
5825/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
5826DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5827{
5828 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5829
5830 /* Get the eflags. */
5831 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5832 kIemNativeGstRegUse_ReadOnly);
5833
5834 /* Test and jump. */
5835 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5836
5837 /* Free but don't flush the EFlags register. */
5838 iemNativeRegFreeTmp(pReNative, idxEflReg);
5839
5840 /* Make a copy of the core state now as we start the if-block. */
5841 iemNativeCondStartIfBlock(pReNative, off);
5842
5843 return off;
5844}
5845
5846
5847#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
5848 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
5849 do {
5850
5851/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
5852DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5853{
5854 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5855
5856 /* Get the eflags. */
5857 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5858 kIemNativeGstRegUse_ReadOnly);
5859
5860 /* Test and jump. */
5861 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5862
5863 /* Free but don't flush the EFlags register. */
5864 iemNativeRegFreeTmp(pReNative, idxEflReg);
5865
5866 /* Make a copy of the core state now as we start the if-block. */
5867 iemNativeCondStartIfBlock(pReNative, off);
5868
5869 return off;
5870}
5871
5872
5873#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
5874 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
5875 do {
5876
5877/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
5878DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5879{
5880 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5881
5882 /* Get the eflags. */
5883 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5884 kIemNativeGstRegUse_ReadOnly);
5885
5886 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5887 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5888
5889 /* Test and jump. */
5890 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5891
5892 /* Free but don't flush the EFlags register. */
5893 iemNativeRegFreeTmp(pReNative, idxEflReg);
5894
5895 /* Make a copy of the core state now as we start the if-block. */
5896 iemNativeCondStartIfBlock(pReNative, off);
5897
5898 return off;
5899}
5900
5901
5902#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
5903 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
5904 do {
5905
5906/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
5907DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5908{
5909 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5910
5911 /* Get the eflags. */
5912 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5913 kIemNativeGstRegUse_ReadOnly);
5914
5915 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5916 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5917
5918 /* Test and jump. */
5919 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5920
5921 /* Free but don't flush the EFlags register. */
5922 iemNativeRegFreeTmp(pReNative, idxEflReg);
5923
5924 /* Make a copy of the core state now as we start the if-block. */
5925 iemNativeCondStartIfBlock(pReNative, off);
5926
5927 return off;
5928}
5929
5930
5931#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
5932 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
5933 do {
5934
5935#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
5936 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
5937 do {
5938
5939/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
5940DECL_INLINE_THROW(uint32_t)
5941iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5942 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
5943{
5944 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5945
5946 /* Get the eflags. */
5947 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5948 kIemNativeGstRegUse_ReadOnly);
5949
5950 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
5951 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
5952
5953 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
5954 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
5955 Assert(iBitNo1 != iBitNo2);
5956
5957#ifdef RT_ARCH_AMD64
5958 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
5959
5960 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5961 if (iBitNo1 > iBitNo2)
5962 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
5963 else
5964 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
5965 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5966
5967#elif defined(RT_ARCH_ARM64)
5968 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5969 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5970
5971 /* and tmpreg, eflreg, #1<<iBitNo1 */
5972 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
5973
5974 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
5975 if (iBitNo1 > iBitNo2)
5976 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5977 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
5978 else
5979 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5980 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
5981
5982 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5983
5984#else
5985# error "Port me"
5986#endif
5987
5988 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
5989 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
5990 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
5991
5992 /* Free but don't flush the EFlags and tmp registers. */
5993 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5994 iemNativeRegFreeTmp(pReNative, idxEflReg);
5995
5996 /* Make a copy of the core state now as we start the if-block. */
5997 iemNativeCondStartIfBlock(pReNative, off);
5998
5999 return off;
6000}
6001
6002
6003#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6004 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6005 do {
6006
6007#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6008 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6009 do {
6010
6011/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6012 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6013DECL_INLINE_THROW(uint32_t)
6014iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6015 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6016{
6017 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6018
6019 /* We need an if-block label for the non-inverted variant. */
6020 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6021 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6022
6023 /* Get the eflags. */
6024 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6025 kIemNativeGstRegUse_ReadOnly);
6026
6027 /* Translate the flag masks to bit numbers. */
6028 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6029 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6030
6031 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6032 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6033 Assert(iBitNo1 != iBitNo);
6034
6035 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6036 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6037 Assert(iBitNo2 != iBitNo);
6038 Assert(iBitNo2 != iBitNo1);
6039
6040#ifdef RT_ARCH_AMD64
6041 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6042#elif defined(RT_ARCH_ARM64)
6043 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6044#endif
6045
6046 /* Check for the lone bit first. */
6047 if (!fInverted)
6048 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6049 else
6050 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6051
6052 /* Then extract and compare the other two bits. */
6053#ifdef RT_ARCH_AMD64
6054 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6055 if (iBitNo1 > iBitNo2)
6056 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6057 else
6058 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6059 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6060
6061#elif defined(RT_ARCH_ARM64)
6062 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6063
6064 /* and tmpreg, eflreg, #1<<iBitNo1 */
6065 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6066
6067 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6068 if (iBitNo1 > iBitNo2)
6069 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6070 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6071 else
6072 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6073 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6074
6075 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6076
6077#else
6078# error "Port me"
6079#endif
6080
6081 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6082 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6083 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6084
6085 /* Free but don't flush the EFlags and tmp registers. */
6086 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6087 iemNativeRegFreeTmp(pReNative, idxEflReg);
6088
6089 /* Make a copy of the core state now as we start the if-block. */
6090 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
6091
6092 return off;
6093}
6094
6095
6096#define IEM_MC_IF_CX_IS_NZ() \
6097 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
6098 do {
6099
6100/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6101DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6102{
6103 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6104
6105 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6106 kIemNativeGstRegUse_ReadOnly);
6107 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6108 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6109
6110 iemNativeCondStartIfBlock(pReNative, off);
6111 return off;
6112}
6113
6114
6115#define IEM_MC_IF_ECX_IS_NZ() \
6116 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
6117 do {
6118
6119#define IEM_MC_IF_RCX_IS_NZ() \
6120 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
6121 do {
6122
6123/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
6124DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6125{
6126 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6127
6128 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6129 kIemNativeGstRegUse_ReadOnly);
6130 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6131 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6132
6133 iemNativeCondStartIfBlock(pReNative, off);
6134 return off;
6135}
6136
6137
6138#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6139 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
6140 do {
6141
6142#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6143 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
6144 do {
6145
6146/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6147DECL_INLINE_THROW(uint32_t)
6148iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
6149{
6150 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6151
6152 /* We have to load both RCX and EFLAGS before we can start branching,
6153 otherwise we'll end up in the else-block with an inconsistent
6154 register allocator state.
6155 Doing EFLAGS first as it's more likely to be loaded, right? */
6156 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6157 kIemNativeGstRegUse_ReadOnly);
6158 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6159 kIemNativeGstRegUse_ReadOnly);
6160
6161 /** @todo we could reduce this to a single branch instruction by spending a
6162 * temporary register and some setnz stuff. Not sure if loops are
6163 * worth it. */
6164 /* Check CX. */
6165 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6166
6167 /* Check the EFlags bit. */
6168 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6169 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6170 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6171 !fCheckIfSet /*fJmpIfSet*/);
6172
6173 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6174 iemNativeRegFreeTmp(pReNative, idxEflReg);
6175
6176 iemNativeCondStartIfBlock(pReNative, off);
6177 return off;
6178}
6179
6180
6181#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6182 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
6183 do {
6184
6185#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6186 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
6187 do {
6188
6189#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6190 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
6191 do {
6192
6193#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6194 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
6195 do {
6196
6197/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
6198 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
6199 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
6200 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
6201DECL_INLINE_THROW(uint32_t)
6202iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6203 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
6204{
6205 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6206
6207 /* We have to load both RCX and EFLAGS before we can start branching,
6208 otherwise we'll end up in the else-block with an inconsistent
6209 register allocator state.
6210 Doing EFLAGS first as it's more likely to be loaded, right? */
6211 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6212 kIemNativeGstRegUse_ReadOnly);
6213 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6214 kIemNativeGstRegUse_ReadOnly);
6215
6216 /** @todo we could reduce this to a single branch instruction by spending a
6217 * temporary register and some setnz stuff. Not sure if loops are
6218 * worth it. */
6219 /* Check RCX/ECX. */
6220 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6221
6222 /* Check the EFlags bit. */
6223 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6224 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6225 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6226 !fCheckIfSet /*fJmpIfSet*/);
6227
6228 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6229 iemNativeRegFreeTmp(pReNative, idxEflReg);
6230
6231 iemNativeCondStartIfBlock(pReNative, off);
6232 return off;
6233}
6234
6235
6236
6237/*********************************************************************************************************************************
6238* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6239*********************************************************************************************************************************/
6240/** Number of hidden arguments for CIMPL calls.
6241 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
6242#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6243# define IEM_CIMPL_HIDDEN_ARGS 3
6244#else
6245# define IEM_CIMPL_HIDDEN_ARGS 2
6246#endif
6247
6248#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
6249 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
6250
6251#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
6252 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
6253
6254#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
6255 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
6256
6257#define IEM_MC_LOCAL(a_Type, a_Name) \
6258 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
6259
6260#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
6261 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
6262
6263
6264/**
6265 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
6266 */
6267DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
6268{
6269 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
6270 return IEM_CIMPL_HIDDEN_ARGS;
6271 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
6272 return 1;
6273 return 0;
6274}
6275
6276
6277/**
6278 * Internal work that allocates a variable with kind set to
6279 * kIemNativeVarKind_Invalid and no current stack allocation.
6280 *
6281 * The kind will either be set by the caller or later when the variable is first
6282 * assigned a value.
6283 */
6284static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6285{
6286 Assert(cbType > 0 && cbType <= 64);
6287 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6288 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6289 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6290 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6291 pReNative->Core.aVars[idxVar].cbVar = cbType;
6292 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6293 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6294 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6295 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6296 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6297 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6298 pReNative->Core.aVars[idxVar].u.uValue = 0;
6299 return idxVar;
6300}
6301
6302
6303/**
6304 * Internal work that allocates an argument variable w/o setting enmKind.
6305 */
6306static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6307{
6308 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6309 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6310 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6311
6312 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6313 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
6314 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6315 return idxVar;
6316}
6317
6318
6319/**
6320 * Gets the stack slot for a stack variable, allocating one if necessary.
6321 *
6322 * Calling this function implies that the stack slot will contain a valid
6323 * variable value. The caller deals with any register currently assigned to the
6324 * variable, typically by spilling it into the stack slot.
6325 *
6326 * @returns The stack slot number.
6327 * @param pReNative The recompiler state.
6328 * @param idxVar The variable.
6329 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6330 */
6331DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6332{
6333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6334 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6335
6336 /* Already got a slot? */
6337 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6338 if (idxStackSlot != UINT8_MAX)
6339 {
6340 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6341 return idxStackSlot;
6342 }
6343
6344 /*
6345 * A single slot is easy to allocate.
6346 * Allocate them from the top end, closest to BP, to reduce the displacement.
6347 */
6348 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
6349 {
6350 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6351 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6352 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6353 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6354 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
6355 return (uint8_t)iSlot;
6356 }
6357
6358 /*
6359 * We need more than one stack slot.
6360 *
6361 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6362 */
6363 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6364 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
6365 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
6366 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
6367 uint32_t bmStack = ~pReNative->Core.bmStack;
6368 while (bmStack != UINT32_MAX)
6369 {
6370/** @todo allocate from the top to reduce BP displacement. */
6371 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6372 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6373 if (!(iSlot & fBitAlignMask))
6374 {
6375 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6376 {
6377 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6378 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6379 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
6380 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
6381 return (uint8_t)iSlot;
6382 }
6383 }
6384 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6385 }
6386 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6387}
6388
6389
6390/**
6391 * Changes the variable to a stack variable.
6392 *
6393 * Currently this is s only possible to do the first time the variable is used,
6394 * switching later is can be implemented but not done.
6395 *
6396 * @param pReNative The recompiler state.
6397 * @param idxVar The variable.
6398 * @throws VERR_IEM_VAR_IPE_2
6399 */
6400static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6401{
6402 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6403 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6404 {
6405 /* We could in theory transition from immediate to stack as well, but it
6406 would involve the caller doing work storing the value on the stack. So,
6407 till that's required we only allow transition from invalid. */
6408 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6409 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6410 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6411 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
6412
6413 /* Note! We don't allocate a stack slot here, that's only done when a
6414 slot is actually needed to hold a variable value. */
6415 }
6416}
6417
6418
6419/**
6420 * Sets it to a variable with a constant value.
6421 *
6422 * This does not require stack storage as we know the value and can always
6423 * reload it, unless of course it's referenced.
6424 *
6425 * @param pReNative The recompiler state.
6426 * @param idxVar The variable.
6427 * @param uValue The immediate value.
6428 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6429 */
6430static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6431{
6432 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6433 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
6434 {
6435 /* Only simple transitions for now. */
6436 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6437 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6438 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
6439 }
6440 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6441
6442 pReNative->Core.aVars[idxVar].u.uValue = uValue;
6443 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
6444 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
6445 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
6446}
6447
6448
6449/**
6450 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6451 *
6452 * This does not require stack storage as we know the value and can always
6453 * reload it. Loading is postponed till needed.
6454 *
6455 * @param pReNative The recompiler state.
6456 * @param idxVar The variable.
6457 * @param idxOtherVar The variable to take the (stack) address of.
6458 *
6459 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6460 */
6461static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6462{
6463 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6464 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6465
6466 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6467 {
6468 /* Only simple transitions for now. */
6469 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6470 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6471 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6472 }
6473 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6474
6475 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
6476
6477 /* Update the other variable, ensure it's a stack variable. */
6478 /** @todo handle variables with const values... that'll go boom now. */
6479 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6480 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
6481}
6482
6483
6484/**
6485 * Sets the variable to a reference (pointer) to a guest register reference.
6486 *
6487 * This does not require stack storage as we know the value and can always
6488 * reload it. Loading is postponed till needed.
6489 *
6490 * @param pReNative The recompiler state.
6491 * @param idxVar The variable.
6492 * @param enmRegClass The class guest registers to reference.
6493 * @param idxReg The register within @a enmRegClass to reference.
6494 *
6495 * @throws VERR_IEM_VAR_IPE_2
6496 */
6497static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6498 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6499{
6500 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6501
6502 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
6503 {
6504 /* Only simple transitions for now. */
6505 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6506 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6507 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
6508 }
6509 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6510
6511 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
6512 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
6513}
6514
6515
6516DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6517{
6518 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6519}
6520
6521
6522DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6523{
6524 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6525
6526 /* Since we're using a generic uint64_t value type, we must truncate it if
6527 the variable is smaller otherwise we may end up with too large value when
6528 scaling up a imm8 w/ sign-extension.
6529
6530 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6531 in the bios, bx=1) when running on arm, because clang expect 16-bit
6532 register parameters to have bits 16 and up set to zero. Instead of
6533 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6534 CF value in the result. */
6535 switch (cbType)
6536 {
6537 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6538 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6539 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6540 }
6541 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6542 return idxVar;
6543}
6544
6545
6546DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6547{
6548 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6549 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6550 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6551 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6552
6553 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6554 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
6555 return idxArgVar;
6556}
6557
6558
6559DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6560{
6561 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6562 /* Don't set to stack now, leave that to the first use as for instance
6563 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6564 return idxVar;
6565}
6566
6567
6568DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6569{
6570 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6571
6572 /* Since we're using a generic uint64_t value type, we must truncate it if
6573 the variable is smaller otherwise we may end up with too large value when
6574 scaling up a imm8 w/ sign-extension. */
6575 switch (cbType)
6576 {
6577 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6578 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6579 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6580 }
6581 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6582 return idxVar;
6583}
6584
6585
6586/**
6587 * Releases the variable's register.
6588 *
6589 * The register must have been previously acquired calling
6590 * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
6591 * iemNativeVarRegisterSetAndAcquire().
6592 */
6593DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6594{
6595 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6596 Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
6597 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6598}
6599
6600
6601/**
6602 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6603 * fixed till we call iemNativeVarRegisterRelease.
6604 *
6605 * @returns The host register number.
6606 * @param pReNative The recompiler state.
6607 * @param idxVar The variable.
6608 * @param poff Pointer to the instruction buffer offset.
6609 * In case a register needs to be freed up or the value
6610 * loaded off the stack.
6611 * @param fInitialized Set if the variable must already have been initialized.
6612 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6613 * the case.
6614 * @param idxRegPref Preferred register number or UINT8_MAX.
6615 */
6616DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6617 bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX)
6618{
6619 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6620 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
6621 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6622
6623 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6624 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6625 {
6626 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
6627 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6628 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6629 return idxReg;
6630 }
6631
6632 /*
6633 * If the kind of variable has not yet been set, default to 'stack'.
6634 */
6635 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
6636 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6637 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
6638 iemNativeVarSetKindToStack(pReNative, idxVar);
6639
6640 /*
6641 * We have to allocate a register for the variable, even if its a stack one
6642 * as we don't know if there are modification being made to it before its
6643 * finalized (todo: analyze and insert hints about that?).
6644 *
6645 * If we can, we try get the correct register for argument variables. This
6646 * is assuming that most argument variables are fetched as close as possible
6647 * to the actual call, so that there aren't any interfering hidden calls
6648 * (memory accesses, etc) inbetween.
6649 *
6650 * If we cannot or it's a variable, we make sure no argument registers
6651 * that will be used by this MC block will be allocated here, and we always
6652 * prefer non-volatile registers to avoid needing to spill stuff for internal
6653 * call.
6654 */
6655 /** @todo Detect too early argument value fetches and warn about hidden
6656 * calls causing less optimal code to be generated in the python script. */
6657
6658 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6659 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6660 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6661 {
6662 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6663 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6664 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6665 }
6666 else if ( idxRegPref < RT_ELEMENTS(pReNative->Core.aHstRegs)
6667 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6668 {
6669 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6670 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6671 & ~pReNative->Core.bmHstRegsWithGstShadow
6672 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6673 & fNotArgsMask;
6674 if (fRegs)
6675 {
6676 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6677 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6678 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6679 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6680 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6681 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6682 }
6683 else
6684 {
6685 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6686 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6687 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6688 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6689 }
6690 }
6691 else
6692 {
6693 idxReg = idxRegPref;
6694 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6695 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
6696 }
6697 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6698 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6699
6700 /*
6701 * Load it off the stack if we've got a stack slot.
6702 */
6703 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6704 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6705 {
6706 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6707 switch (pReNative->Core.aVars[idxVar].cbVar)
6708 {
6709 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6710 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6711 case 3: AssertFailed(); RT_FALL_THRU();
6712 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6713 default: AssertFailed(); RT_FALL_THRU();
6714 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6715 }
6716 }
6717 else
6718 {
6719 Assert(idxStackSlot == UINT8_MAX);
6720 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6721 }
6722 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6723 return idxReg;
6724}
6725
6726
6727/**
6728 * The value of variable @a idxVar will be written in full to the @a enmGstReg
6729 * guest register.
6730 *
6731 * This function makes sure there is a register for it and sets it to be the
6732 * current shadow copy of @a enmGstReg.
6733 *
6734 * @returns The host register number.
6735 * @param pReNative The recompiler state.
6736 * @param idxVar The variable.
6737 * @param enmGstReg The guest register this variable will be written to
6738 * after this call.
6739 * @param poff Pointer to the instruction buffer offset.
6740 * In case a register needs to be freed up or if the
6741 * variable content needs to be loaded off the stack.
6742 *
6743 * @note We DO NOT expect @a idxVar to be an argument variable,
6744 * because we can only in the commit stage of an instruction when this
6745 * function is used.
6746 */
6747DECL_HIDDEN_THROW(uint8_t)
6748iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
6749{
6750 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6751 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6752 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
6753 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
6754 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
6755 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
6756 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
6757 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6758
6759 /*
6760 * This shouldn't ever be used for arguments, unless it's in a weird else
6761 * branch that doesn't do any calling and even then it's questionable.
6762 *
6763 * However, in case someone writes crazy wrong MC code and does register
6764 * updates before making calls, just use the regular register allocator to
6765 * ensure we get a register suitable for the intended argument number.
6766 */
6767 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
6768
6769 /*
6770 * If there is already a register for the variable, we transfer/set the
6771 * guest shadow copy assignment to it.
6772 */
6773 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6774 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6775 {
6776 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
6777 {
6778 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
6779 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
6780 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
6781 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
6782 }
6783 else
6784 {
6785 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
6786 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
6787 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
6788 }
6789 /** @todo figure this one out. We need some way of making sure the register isn't
6790 * modified after this point, just in case we start writing crappy MC code. */
6791 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
6792 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6793 return idxReg;
6794 }
6795 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6796
6797 /*
6798 * Because this is supposed to be the commit stage, we're just tag along with the
6799 * temporary register allocator and upgrade it to a variable register.
6800 */
6801 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
6802 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
6803 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
6804 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
6805 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
6806 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6807
6808 /*
6809 * Now we need to load the register value.
6810 */
6811 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
6812 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
6813 else
6814 {
6815 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6816 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6817 switch (pReNative->Core.aVars[idxVar].cbVar)
6818 {
6819 case sizeof(uint64_t):
6820 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
6821 break;
6822 case sizeof(uint32_t):
6823 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
6824 break;
6825 case sizeof(uint16_t):
6826 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
6827 break;
6828 case sizeof(uint8_t):
6829 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
6830 break;
6831 default:
6832 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6833 }
6834 }
6835
6836 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6837 return idxReg;
6838}
6839
6840
6841/**
6842 * Sets the host register for @a idxVarRc to @a idxReg.
6843 *
6844 * The register must not be allocated. Any guest register shadowing will be
6845 * implictly dropped by this call.
6846 *
6847 * The variable must not have any register associated with it (causes
6848 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
6849 * implied.
6850 *
6851 * @returns idxReg
6852 * @param pReNative The recompiler state.
6853 * @param idxVar The variable.
6854 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
6855 * @param off For recording in debug info.
6856 *
6857 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
6858 */
6859DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
6860{
6861 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6862 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6863 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6864 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
6865 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
6866
6867 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
6868 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6869
6870 iemNativeVarSetKindToStack(pReNative, idxVar);
6871 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6872
6873 return idxReg;
6874}
6875
6876
6877/**
6878 * A convenient helper function.
6879 */
6880DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6881 uint8_t idxReg, uint32_t *poff)
6882{
6883 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
6884 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6885 return idxReg;
6886}
6887
6888
6889/**
6890 * Worker that frees the stack slots for variable @a idxVar if any allocated.
6891 *
6892 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
6893 */
6894DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6895{
6896 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6897 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6898 {
6899 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
6900 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
6901 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
6902 Assert(cSlots > 0);
6903 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
6904 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
6905 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
6906 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6907 }
6908 else
6909 Assert(idxStackSlot == UINT8_MAX);
6910}
6911
6912
6913/**
6914 * Worker that frees a single variable.
6915 *
6916 * ASSUMES that @a idxVar is valid.
6917 */
6918DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6919{
6920 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
6921 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
6922
6923 /* Free the host register first if any assigned. */
6924 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6925 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6926 {
6927 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
6928 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
6929 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6930 }
6931
6932 /* Free argument mapping. */
6933 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6934 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
6935 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
6936
6937 /* Free the stack slots. */
6938 iemNativeVarFreeStackSlots(pReNative, idxVar);
6939
6940 /* Free the actual variable. */
6941 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6942 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6943}
6944
6945
6946/**
6947 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
6948 */
6949DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
6950{
6951 while (bmVars != 0)
6952 {
6953 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
6954 bmVars &= ~RT_BIT_32(idxVar);
6955
6956#if 1 /** @todo optimize by simplifying this later... */
6957 iemNativeVarFreeOneWorker(pReNative, idxVar);
6958#else
6959 /* Only need to free the host register, the rest is done as bulk updates below. */
6960 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6961 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6962 {
6963 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
6964 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
6965 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6966 }
6967#endif
6968 }
6969#if 0 /** @todo optimize by simplifying this later... */
6970 pReNative->Core.bmVars = 0;
6971 pReNative->Core.bmStack = 0;
6972 pReNative->Core.u64ArgVars = UINT64_MAX;
6973#endif
6974}
6975
6976
6977/**
6978 * This is called by IEM_MC_END() to clean up all variables.
6979 */
6980DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
6981{
6982 uint32_t const bmVars = pReNative->Core.bmVars;
6983 if (bmVars != 0)
6984 iemNativeVarFreeAllSlow(pReNative, bmVars);
6985 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
6986 Assert(pReNative->Core.bmStack == 0);
6987}
6988
6989
6990#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
6991
6992/**
6993 * This is called by IEM_MC_FREE_LOCAL.
6994 */
6995DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6996{
6997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6998 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6999 iemNativeVarFreeOneWorker(pReNative, idxVar);
7000}
7001
7002
7003#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
7004
7005/**
7006 * This is called by IEM_MC_FREE_ARG.
7007 */
7008DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7009{
7010 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7011 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
7012 iemNativeVarFreeOneWorker(pReNative, idxVar);
7013}
7014
7015
7016#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
7017
7018/**
7019 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
7020 */
7021DECL_INLINE_THROW(uint32_t)
7022iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
7023{
7024 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
7025 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
7026 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7027 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
7028 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
7029
7030 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
7031 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
7032 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
7033 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7034
7035 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
7036
7037 /*
7038 * Special case for immediates.
7039 */
7040 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
7041 {
7042 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7043 {
7044 case sizeof(uint16_t):
7045 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7046 break;
7047 case sizeof(uint32_t):
7048 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7049 break;
7050 default: AssertFailed(); break;
7051 }
7052 }
7053 else
7054 {
7055 /*
7056 * The generic solution for now.
7057 */
7058 /** @todo optimize this by having the python script make sure the source
7059 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
7060 * statement. Then we could just transfer the register assignments. */
7061 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
7062 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
7063 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7064 {
7065 case sizeof(uint16_t):
7066 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
7067 break;
7068 case sizeof(uint32_t):
7069 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
7070 break;
7071 default: AssertFailed(); break;
7072 }
7073 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
7074 iemNativeVarRegisterRelease(pReNative, idxVarDst);
7075 }
7076 return off;
7077}
7078
7079
7080
7081/*********************************************************************************************************************************
7082* Emitters for IEM_MC_CALL_CIMPL_XXX *
7083*********************************************************************************************************************************/
7084
7085/**
7086 * Emits code to load a reference to the given guest register into @a idxGprDst.
7087 */
7088DECL_INLINE_THROW(uint32_t)
7089iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7090 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7091{
7092 /*
7093 * Get the offset relative to the CPUMCTX structure.
7094 */
7095 uint32_t offCpumCtx;
7096 switch (enmClass)
7097 {
7098 case kIemNativeGstRegRef_Gpr:
7099 Assert(idxRegInClass < 16);
7100 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7101 break;
7102
7103 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7104 Assert(idxRegInClass < 4);
7105 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7106 break;
7107
7108 case kIemNativeGstRegRef_EFlags:
7109 Assert(idxRegInClass == 0);
7110 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7111 break;
7112
7113 case kIemNativeGstRegRef_MxCsr:
7114 Assert(idxRegInClass == 0);
7115 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7116 break;
7117
7118 case kIemNativeGstRegRef_FpuReg:
7119 Assert(idxRegInClass < 8);
7120 AssertFailed(); /** @todo what kind of indexing? */
7121 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7122 break;
7123
7124 case kIemNativeGstRegRef_MReg:
7125 Assert(idxRegInClass < 8);
7126 AssertFailed(); /** @todo what kind of indexing? */
7127 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7128 break;
7129
7130 case kIemNativeGstRegRef_XReg:
7131 Assert(idxRegInClass < 16);
7132 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7133 break;
7134
7135 default:
7136 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7137 }
7138
7139 /*
7140 * Load the value into the destination register.
7141 */
7142#ifdef RT_ARCH_AMD64
7143 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7144
7145#elif defined(RT_ARCH_ARM64)
7146 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7147 Assert(offCpumCtx < 4096);
7148 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7149
7150#else
7151# error "Port me!"
7152#endif
7153
7154 return off;
7155}
7156
7157
7158/**
7159 * Common code for CIMPL and AIMPL calls.
7160 *
7161 * These are calls that uses argument variables and such. They should not be
7162 * confused with internal calls required to implement an MC operation,
7163 * like a TLB load and similar.
7164 *
7165 * Upon return all that is left to do is to load any hidden arguments and
7166 * perform the call. All argument variables are freed.
7167 *
7168 * @returns New code buffer offset; throws VBox status code on error.
7169 * @param pReNative The native recompile state.
7170 * @param off The code buffer offset.
7171 * @param cArgs The total nubmer of arguments (includes hidden
7172 * count).
7173 * @param cHiddenArgs The number of hidden arguments. The hidden
7174 * arguments must not have any variable declared for
7175 * them, whereas all the regular arguments must
7176 * (tstIEMCheckMc ensures this).
7177 */
7178DECL_HIDDEN_THROW(uint32_t)
7179iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7180{
7181#ifdef VBOX_STRICT
7182 /*
7183 * Assert sanity.
7184 */
7185 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7186 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7187 for (unsigned i = 0; i < cHiddenArgs; i++)
7188 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7189 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7190 {
7191 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7192 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7193 }
7194 iemNativeRegAssertSanity(pReNative);
7195#endif
7196
7197 /*
7198 * Before we do anything else, go over variables that are referenced and
7199 * make sure they are not in a register.
7200 */
7201 uint32_t bmVars = pReNative->Core.bmVars;
7202 if (bmVars)
7203 {
7204 do
7205 {
7206 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7207 bmVars &= ~RT_BIT_32(idxVar);
7208
7209 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7210 {
7211 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7212 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7213 {
7214 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7215 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7216 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7217 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7218 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7219
7220 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7221 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7222 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7223 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7224 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7225 }
7226 }
7227 } while (bmVars != 0);
7228#if 0 //def VBOX_STRICT
7229 iemNativeRegAssertSanity(pReNative);
7230#endif
7231 }
7232
7233 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7234
7235 /*
7236 * First, go over the host registers that will be used for arguments and make
7237 * sure they either hold the desired argument or are free.
7238 */
7239 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7240 {
7241 for (uint32_t i = 0; i < cRegArgs; i++)
7242 {
7243 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7244 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7245 {
7246 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7247 {
7248 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7249 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7250 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
7251 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7252 if (uArgNo == i)
7253 { /* prefect */ }
7254 /* The variable allocator logic should make sure this is impossible,
7255 except for when the return register is used as a parameter (ARM,
7256 but not x86). */
7257#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7258 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7259 {
7260# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7261# error "Implement this"
7262# endif
7263 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7264 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7265 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7266 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7267 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7268 }
7269#endif
7270 else
7271 {
7272 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7273
7274 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
7275 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7276 else
7277 {
7278 /* just free it, can be reloaded if used again */
7279 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7280 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7281 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7282 }
7283 }
7284 }
7285 else
7286 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7287 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7288 }
7289 }
7290#if 0 //def VBOX_STRICT
7291 iemNativeRegAssertSanity(pReNative);
7292#endif
7293 }
7294
7295 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7296
7297#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7298 /*
7299 * If there are any stack arguments, make sure they are in their place as well.
7300 *
7301 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7302 * the caller) be loading it later and it must be free (see first loop).
7303 */
7304 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7305 {
7306 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7307 {
7308 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7309 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7310 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7311 {
7312 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7313 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
7314 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
7315 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7316 }
7317 else
7318 {
7319 /* Use ARG0 as temp for stuff we need registers for. */
7320 switch (pReNative->Core.aVars[idxVar].enmKind)
7321 {
7322 case kIemNativeVarKind_Stack:
7323 {
7324 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7325 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7326 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7327 iemNativeStackCalcBpDisp(idxStackSlot));
7328 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7329 continue;
7330 }
7331
7332 case kIemNativeVarKind_Immediate:
7333 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
7334 continue;
7335
7336 case kIemNativeVarKind_VarRef:
7337 {
7338 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7339 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7340 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7341 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7342 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7343 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7344 {
7345 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7346 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7347 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7348 }
7349 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7350 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7351 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7352 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7353 continue;
7354 }
7355
7356 case kIemNativeVarKind_GstRegRef:
7357 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7358 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7359 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7360 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7361 continue;
7362
7363 case kIemNativeVarKind_Invalid:
7364 case kIemNativeVarKind_End:
7365 break;
7366 }
7367 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7368 }
7369 }
7370# if 0 //def VBOX_STRICT
7371 iemNativeRegAssertSanity(pReNative);
7372# endif
7373 }
7374#else
7375 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7376#endif
7377
7378 /*
7379 * Make sure the argument variables are loaded into their respective registers.
7380 *
7381 * We can optimize this by ASSUMING that any register allocations are for
7382 * registeres that have already been loaded and are ready. The previous step
7383 * saw to that.
7384 */
7385 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
7386 {
7387 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7388 {
7389 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7390 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7391 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
7392 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
7393 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
7394 else
7395 {
7396 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7397 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7398 {
7399 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7400 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
7401 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
7402 | RT_BIT_32(idxArgReg);
7403 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
7404 }
7405 else
7406 {
7407 /* Use ARG0 as temp for stuff we need registers for. */
7408 switch (pReNative->Core.aVars[idxVar].enmKind)
7409 {
7410 case kIemNativeVarKind_Stack:
7411 {
7412 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7413 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7414 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
7415 continue;
7416 }
7417
7418 case kIemNativeVarKind_Immediate:
7419 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
7420 continue;
7421
7422 case kIemNativeVarKind_VarRef:
7423 {
7424 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7425 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7426 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7427 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7428 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7429 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7430 {
7431 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7432 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7433 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7434 }
7435 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7436 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7437 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
7438 continue;
7439 }
7440
7441 case kIemNativeVarKind_GstRegRef:
7442 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
7443 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7444 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7445 continue;
7446
7447 case kIemNativeVarKind_Invalid:
7448 case kIemNativeVarKind_End:
7449 break;
7450 }
7451 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7452 }
7453 }
7454 }
7455#if 0 //def VBOX_STRICT
7456 iemNativeRegAssertSanity(pReNative);
7457#endif
7458 }
7459#ifdef VBOX_STRICT
7460 else
7461 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7462 {
7463 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
7464 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
7465 }
7466#endif
7467
7468 /*
7469 * Free all argument variables (simplified).
7470 * Their lifetime always expires with the call they are for.
7471 */
7472 /** @todo Make the python script check that arguments aren't used after
7473 * IEM_MC_CALL_XXXX. */
7474 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
7475 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
7476 * an argument value. There is also some FPU stuff. */
7477 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
7478 {
7479 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7480 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7481
7482 /* no need to free registers: */
7483 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
7484 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
7485 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
7486 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
7487 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
7488 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
7489
7490 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
7491 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7492 iemNativeVarFreeStackSlots(pReNative, idxVar);
7493 }
7494 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7495
7496 /*
7497 * Flush volatile registers as we make the call.
7498 */
7499 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
7500
7501 return off;
7502}
7503
7504
7505/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
7506DECL_HIDDEN_THROW(uint32_t)
7507iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
7508 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
7509
7510{
7511 /*
7512 * Do all the call setup and cleanup.
7513 */
7514 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
7515
7516 /*
7517 * Load the two or three hidden arguments.
7518 */
7519#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7520 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7521 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7522 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
7523#else
7524 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7525 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
7526#endif
7527
7528 /*
7529 * Make the call and check the return code.
7530 *
7531 * Shadow PC copies are always flushed here, other stuff depends on flags.
7532 * Segment and general purpose registers are explictily flushed via the
7533 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
7534 * macros.
7535 */
7536 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
7537#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7538 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7539#endif
7540 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
7541 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
7542 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
7543 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7544
7545 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7546}
7547
7548
7549#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7550 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
7551
7552/** Emits code for IEM_MC_CALL_CIMPL_1. */
7553DECL_INLINE_THROW(uint32_t)
7554iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7555 uintptr_t pfnCImpl, uint8_t idxArg0)
7556{
7557 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7558 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
7559}
7560
7561
7562#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7563 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
7564
7565/** Emits code for IEM_MC_CALL_CIMPL_2. */
7566DECL_INLINE_THROW(uint32_t)
7567iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7568 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
7569{
7570 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7571 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7572 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
7573}
7574
7575
7576#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7577 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7578 (uintptr_t)a_pfnCImpl, a0, a1, a2)
7579
7580/** Emits code for IEM_MC_CALL_CIMPL_3. */
7581DECL_INLINE_THROW(uint32_t)
7582iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7583 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7584{
7585 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7586 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7587 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7588 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
7589}
7590
7591
7592#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
7593 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7594 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
7595
7596/** Emits code for IEM_MC_CALL_CIMPL_4. */
7597DECL_INLINE_THROW(uint32_t)
7598iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7599 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7600{
7601 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7602 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7603 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7604 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7605 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
7606}
7607
7608
7609#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
7610 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7611 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
7612
7613/** Emits code for IEM_MC_CALL_CIMPL_4. */
7614DECL_INLINE_THROW(uint32_t)
7615iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7616 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
7617{
7618 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7619 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7620 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7621 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7622 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
7623 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
7624}
7625
7626
7627/** Recompiler debugging: Flush guest register shadow copies. */
7628#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
7629
7630
7631
7632/*********************************************************************************************************************************
7633* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
7634*********************************************************************************************************************************/
7635
7636/**
7637 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
7638 */
7639DECL_INLINE_THROW(uint32_t)
7640iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7641 uintptr_t pfnAImpl, uint8_t cArgs)
7642{
7643 if (idxVarRc != UINT8_MAX)
7644 {
7645 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
7646 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
7647 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
7648 }
7649
7650 /*
7651 * Do all the call setup and cleanup.
7652 */
7653 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
7654
7655 /*
7656 * Make the call and update the return code variable if we've got one.
7657 */
7658 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
7659 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
7660 {
7661pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
7662 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
7663 }
7664
7665 return off;
7666}
7667
7668
7669
7670#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
7671 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
7672
7673#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
7674 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
7675
7676/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
7677DECL_INLINE_THROW(uint32_t)
7678iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
7679{
7680 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
7681}
7682
7683
7684#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
7685 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
7686
7687#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
7688 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
7689
7690/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
7691DECL_INLINE_THROW(uint32_t)
7692iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
7693{
7694 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7695 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
7696}
7697
7698
7699#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
7700 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
7701
7702#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
7703 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
7704
7705/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
7706DECL_INLINE_THROW(uint32_t)
7707iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7708 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
7709{
7710 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7711 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7712 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
7713}
7714
7715
7716#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
7717 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
7718
7719#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
7720 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
7721
7722/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
7723DECL_INLINE_THROW(uint32_t)
7724iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7725 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7726{
7727 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7728 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7729 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7730 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
7731}
7732
7733
7734#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
7735 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7736
7737#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
7738 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7739
7740/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
7741DECL_INLINE_THROW(uint32_t)
7742iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7743 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7744{
7745 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7746 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7747 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7748 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
7749 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
7750}
7751
7752
7753
7754/*********************************************************************************************************************************
7755* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
7756*********************************************************************************************************************************/
7757
7758#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
7759 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
7760
7761#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7762 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
7763
7764#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7765 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
7766
7767#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7768 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
7769
7770
7771/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
7772 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
7773DECL_INLINE_THROW(uint32_t)
7774iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
7775{
7776 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7777 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7778 Assert(iGRegEx < 20);
7779
7780 /* Same discussion as in iemNativeEmitFetchGregU16 */
7781 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7782 kIemNativeGstRegUse_ReadOnly);
7783
7784 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7785 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7786
7787 /* The value is zero-extended to the full 64-bit host register width. */
7788 if (iGRegEx < 16)
7789 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7790 else
7791 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7792
7793 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7794 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7795 return off;
7796}
7797
7798
7799#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7800 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
7801
7802#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7803 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
7804
7805#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7806 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
7807
7808/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
7809DECL_INLINE_THROW(uint32_t)
7810iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
7811{
7812 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7813 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7814 Assert(iGRegEx < 20);
7815
7816 /* Same discussion as in iemNativeEmitFetchGregU16 */
7817 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7818 kIemNativeGstRegUse_ReadOnly);
7819
7820 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7821 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7822
7823 if (iGRegEx < 16)
7824 {
7825 switch (cbSignExtended)
7826 {
7827 case sizeof(uint16_t):
7828 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7829 break;
7830 case sizeof(uint32_t):
7831 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7832 break;
7833 case sizeof(uint64_t):
7834 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7835 break;
7836 default: AssertFailed(); break;
7837 }
7838 }
7839 else
7840 {
7841 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7842 switch (cbSignExtended)
7843 {
7844 case sizeof(uint16_t):
7845 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7846 break;
7847 case sizeof(uint32_t):
7848 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7849 break;
7850 case sizeof(uint64_t):
7851 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7852 break;
7853 default: AssertFailed(); break;
7854 }
7855 }
7856
7857 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7858 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7859 return off;
7860}
7861
7862
7863
7864#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
7865 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
7866
7867#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
7868 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7869
7870#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
7871 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7872
7873/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
7874DECL_INLINE_THROW(uint32_t)
7875iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
7876{
7877 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7878 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7879 Assert(iGReg < 16);
7880
7881 /*
7882 * We can either just load the low 16-bit of the GPR into a host register
7883 * for the variable, or we can do so via a shadow copy host register. The
7884 * latter will avoid having to reload it if it's being stored later, but
7885 * will waste a host register if it isn't touched again. Since we don't
7886 * know what going to happen, we choose the latter for now.
7887 */
7888 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7889 kIemNativeGstRegUse_ReadOnly);
7890
7891 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7892 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7893 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7894 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7895
7896 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7897 return off;
7898}
7899
7900
7901#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
7902 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7903
7904#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
7905 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7906
7907/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
7908DECL_INLINE_THROW(uint32_t)
7909iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
7910{
7911 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7912 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7913 Assert(iGReg < 16);
7914
7915 /*
7916 * We can either just load the low 16-bit of the GPR into a host register
7917 * for the variable, or we can do so via a shadow copy host register. The
7918 * latter will avoid having to reload it if it's being stored later, but
7919 * will waste a host register if it isn't touched again. Since we don't
7920 * know what going to happen, we choose the latter for now.
7921 */
7922 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7923 kIemNativeGstRegUse_ReadOnly);
7924
7925 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7926 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7927 if (cbSignExtended == sizeof(uint32_t))
7928 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7929 else
7930 {
7931 Assert(cbSignExtended == sizeof(uint64_t));
7932 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7933 }
7934 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7935
7936 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7937 return off;
7938}
7939
7940
7941#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
7942 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
7943
7944#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
7945 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
7946
7947/** Emits code for IEM_MC_FETCH_GREG_U32. */
7948DECL_INLINE_THROW(uint32_t)
7949iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
7950{
7951 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7952 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
7953 Assert(iGReg < 16);
7954
7955 /*
7956 * We can either just load the low 16-bit of the GPR into a host register
7957 * for the variable, or we can do so via a shadow copy host register. The
7958 * latter will avoid having to reload it if it's being stored later, but
7959 * will waste a host register if it isn't touched again. Since we don't
7960 * know what going to happen, we choose the latter for now.
7961 */
7962 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7963 kIemNativeGstRegUse_ReadOnly);
7964
7965 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7966 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7967 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
7968 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7969
7970 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7971 return off;
7972}
7973
7974
7975#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
7976 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
7977
7978/** Emits code for IEM_MC_FETCH_GREG_U32. */
7979DECL_INLINE_THROW(uint32_t)
7980iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
7981{
7982 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7983 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
7984 Assert(iGReg < 16);
7985
7986 /*
7987 * We can either just load the low 32-bit of the GPR into a host register
7988 * for the variable, or we can do so via a shadow copy host register. The
7989 * latter will avoid having to reload it if it's being stored later, but
7990 * will waste a host register if it isn't touched again. Since we don't
7991 * know what going to happen, we choose the latter for now.
7992 */
7993 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7994 kIemNativeGstRegUse_ReadOnly);
7995
7996 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7997 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7998 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
7999 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8000
8001 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8002 return off;
8003}
8004
8005
8006#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
8007 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8008
8009#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
8010 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8011
8012/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
8013 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
8014DECL_INLINE_THROW(uint32_t)
8015iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8016{
8017 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8018 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8019 Assert(iGReg < 16);
8020
8021 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8022 kIemNativeGstRegUse_ReadOnly);
8023
8024 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8025 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8026 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
8027 /** @todo name the register a shadow one already? */
8028 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8029
8030 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8031 return off;
8032}
8033
8034
8035
8036/*********************************************************************************************************************************
8037* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
8038*********************************************************************************************************************************/
8039
8040#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
8041 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
8042
8043/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
8044DECL_INLINE_THROW(uint32_t)
8045iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
8046{
8047 Assert(iGRegEx < 20);
8048 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8049 kIemNativeGstRegUse_ForUpdate);
8050#ifdef RT_ARCH_AMD64
8051 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8052
8053 /* To the lowest byte of the register: mov r8, imm8 */
8054 if (iGRegEx < 16)
8055 {
8056 if (idxGstTmpReg >= 8)
8057 pbCodeBuf[off++] = X86_OP_REX_B;
8058 else if (idxGstTmpReg >= 4)
8059 pbCodeBuf[off++] = X86_OP_REX;
8060 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8061 pbCodeBuf[off++] = u8Value;
8062 }
8063 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
8064 else if (idxGstTmpReg < 4)
8065 {
8066 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
8067 pbCodeBuf[off++] = u8Value;
8068 }
8069 else
8070 {
8071 /* ror reg64, 8 */
8072 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8073 pbCodeBuf[off++] = 0xc1;
8074 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8075 pbCodeBuf[off++] = 8;
8076
8077 /* mov reg8, imm8 */
8078 if (idxGstTmpReg >= 8)
8079 pbCodeBuf[off++] = X86_OP_REX_B;
8080 else if (idxGstTmpReg >= 4)
8081 pbCodeBuf[off++] = X86_OP_REX;
8082 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8083 pbCodeBuf[off++] = u8Value;
8084
8085 /* rol reg64, 8 */
8086 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8087 pbCodeBuf[off++] = 0xc1;
8088 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8089 pbCodeBuf[off++] = 8;
8090 }
8091
8092#elif defined(RT_ARCH_ARM64)
8093 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
8094 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8095 if (iGRegEx < 16)
8096 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
8097 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
8098 else
8099 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
8100 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
8101 iemNativeRegFreeTmp(pReNative, idxImmReg);
8102
8103#else
8104# error "Port me!"
8105#endif
8106
8107 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8108
8109 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8110
8111 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8112 return off;
8113}
8114
8115
8116#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
8117 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
8118
8119/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
8120DECL_INLINE_THROW(uint32_t)
8121iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
8122{
8123 Assert(iGRegEx < 20);
8124 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8125
8126 /*
8127 * If it's a constant value (unlikely) we treat this as a
8128 * IEM_MC_STORE_GREG_U8_CONST statement.
8129 */
8130 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8131 { /* likely */ }
8132 else
8133 {
8134 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8135 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8136 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8137 }
8138
8139 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8140 kIemNativeGstRegUse_ForUpdate);
8141 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8142
8143#ifdef RT_ARCH_AMD64
8144 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
8145 if (iGRegEx < 16)
8146 {
8147 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8148 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8149 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8150 else if (idxGstTmpReg >= 4)
8151 pbCodeBuf[off++] = X86_OP_REX;
8152 pbCodeBuf[off++] = 0x8a;
8153 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8154 }
8155 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
8156 else if (idxGstTmpReg < 4 && idxVarReg < 4)
8157 {
8158 /** @todo test this. */
8159 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
8160 pbCodeBuf[off++] = 0x8a;
8161 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
8162 }
8163 else
8164 {
8165 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
8166
8167 /* ror reg64, 8 */
8168 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8169 pbCodeBuf[off++] = 0xc1;
8170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8171 pbCodeBuf[off++] = 8;
8172
8173 /* mov reg8, reg8(r/m) */
8174 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8175 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8176 else if (idxGstTmpReg >= 4)
8177 pbCodeBuf[off++] = X86_OP_REX;
8178 pbCodeBuf[off++] = 0x8a;
8179 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8180
8181 /* rol reg64, 8 */
8182 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8183 pbCodeBuf[off++] = 0xc1;
8184 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8185 pbCodeBuf[off++] = 8;
8186 }
8187
8188#elif defined(RT_ARCH_ARM64)
8189 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
8190 or
8191 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
8192 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8193 if (iGRegEx < 16)
8194 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
8195 else
8196 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
8197
8198#else
8199# error "Port me!"
8200#endif
8201 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8202
8203 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8204
8205 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8206 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8207 return off;
8208}
8209
8210
8211
8212#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
8213 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
8214
8215/** Emits code for IEM_MC_STORE_GREG_U16. */
8216DECL_INLINE_THROW(uint32_t)
8217iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
8218{
8219 Assert(iGReg < 16);
8220 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8221 kIemNativeGstRegUse_ForUpdate);
8222#ifdef RT_ARCH_AMD64
8223 /* mov reg16, imm16 */
8224 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8225 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8226 if (idxGstTmpReg >= 8)
8227 pbCodeBuf[off++] = X86_OP_REX_B;
8228 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
8229 pbCodeBuf[off++] = RT_BYTE1(uValue);
8230 pbCodeBuf[off++] = RT_BYTE2(uValue);
8231
8232#elif defined(RT_ARCH_ARM64)
8233 /* movk xdst, #uValue, lsl #0 */
8234 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8235 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
8236
8237#else
8238# error "Port me!"
8239#endif
8240
8241 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8242
8243 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8244 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8245 return off;
8246}
8247
8248
8249#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
8250 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
8251
8252/** Emits code for IEM_MC_STORE_GREG_U16. */
8253DECL_INLINE_THROW(uint32_t)
8254iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8255{
8256 Assert(iGReg < 16);
8257 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8258
8259 /*
8260 * If it's a constant value (unlikely) we treat this as a
8261 * IEM_MC_STORE_GREG_U16_CONST statement.
8262 */
8263 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8264 { /* likely */ }
8265 else
8266 {
8267 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8268 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8269 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8270 }
8271
8272 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8273 kIemNativeGstRegUse_ForUpdate);
8274
8275#ifdef RT_ARCH_AMD64
8276 /* mov reg16, reg16 or [mem16] */
8277 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8278 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8279 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8280 {
8281 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
8282 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
8283 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
8284 pbCodeBuf[off++] = 0x8b;
8285 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
8286 }
8287 else
8288 {
8289 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
8290 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8291 if (idxGstTmpReg >= 8)
8292 pbCodeBuf[off++] = X86_OP_REX_R;
8293 pbCodeBuf[off++] = 0x8b;
8294 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8295 }
8296
8297#elif defined(RT_ARCH_ARM64)
8298 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
8299 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8300 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8301 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
8302 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8303
8304#else
8305# error "Port me!"
8306#endif
8307
8308 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8309
8310 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8311 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8312 return off;
8313}
8314
8315
8316#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
8317 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
8318
8319/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
8320DECL_INLINE_THROW(uint32_t)
8321iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
8322{
8323 Assert(iGReg < 16);
8324 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8325 kIemNativeGstRegUse_ForFullWrite);
8326 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8327 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8328 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8329 return off;
8330}
8331
8332
8333#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
8334 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
8335
8336/** Emits code for IEM_MC_STORE_GREG_U32. */
8337DECL_INLINE_THROW(uint32_t)
8338iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8339{
8340 Assert(iGReg < 16);
8341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8342
8343 /*
8344 * If it's a constant value (unlikely) we treat this as a
8345 * IEM_MC_STORE_GREG_U32_CONST statement.
8346 */
8347 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8348 { /* likely */ }
8349 else
8350 {
8351 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8352 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8353 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8354 }
8355
8356 /*
8357 * For the rest we allocate a guest register for the variable and writes
8358 * it to the CPUMCTX structure.
8359 */
8360 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8361 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8362#ifdef VBOX_STRICT
8363 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
8364#endif
8365 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8366 return off;
8367}
8368
8369
8370#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
8371 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
8372
8373/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
8374DECL_INLINE_THROW(uint32_t)
8375iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
8376{
8377 Assert(iGReg < 16);
8378 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8379 kIemNativeGstRegUse_ForFullWrite);
8380 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8381 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8382 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8383 return off;
8384}
8385
8386
8387#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
8388 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
8389
8390/** Emits code for IEM_MC_STORE_GREG_U64. */
8391DECL_INLINE_THROW(uint32_t)
8392iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8393{
8394 Assert(iGReg < 16);
8395 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8396
8397 /*
8398 * If it's a constant value (unlikely) we treat this as a
8399 * IEM_MC_STORE_GREG_U64_CONST statement.
8400 */
8401 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8402 { /* likely */ }
8403 else
8404 {
8405 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8406 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8407 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
8408 }
8409
8410 /*
8411 * For the rest we allocate a guest register for the variable and writes
8412 * it to the CPUMCTX structure.
8413 */
8414 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8415 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8416 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8417 return off;
8418}
8419
8420
8421#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
8422 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
8423
8424/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
8425DECL_INLINE_THROW(uint32_t)
8426iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
8427{
8428 Assert(iGReg < 16);
8429 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8430 kIemNativeGstRegUse_ForUpdate);
8431 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
8432 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8433 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8434 return off;
8435}
8436
8437
8438/*********************************************************************************************************************************
8439* General purpose register manipulation (add, sub). *
8440*********************************************************************************************************************************/
8441
8442#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8443 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8444
8445/** Emits code for IEM_MC_ADD_GREG_U16. */
8446DECL_INLINE_THROW(uint32_t)
8447iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
8448{
8449 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8450 kIemNativeGstRegUse_ForUpdate);
8451
8452#ifdef RT_ARCH_AMD64
8453 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8454 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8455 if (idxGstTmpReg >= 8)
8456 pbCodeBuf[off++] = X86_OP_REX_B;
8457 if (uAddend == 1)
8458 {
8459 pbCodeBuf[off++] = 0xff; /* inc */
8460 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8461 }
8462 else
8463 {
8464 pbCodeBuf[off++] = 0x81;
8465 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8466 pbCodeBuf[off++] = uAddend;
8467 pbCodeBuf[off++] = 0;
8468 }
8469
8470#else
8471 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8472 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8473
8474 /* sub tmp, gstgrp, uAddend */
8475 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
8476
8477 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8478 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8479
8480 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8481#endif
8482
8483 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8484
8485 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8486
8487 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8488 return off;
8489}
8490
8491
8492#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
8493 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8494
8495#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
8496 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8497
8498/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
8499DECL_INLINE_THROW(uint32_t)
8500iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
8501{
8502 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8503 kIemNativeGstRegUse_ForUpdate);
8504
8505#ifdef RT_ARCH_AMD64
8506 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8507 if (f64Bit)
8508 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8509 else if (idxGstTmpReg >= 8)
8510 pbCodeBuf[off++] = X86_OP_REX_B;
8511 if (uAddend == 1)
8512 {
8513 pbCodeBuf[off++] = 0xff; /* inc */
8514 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8515 }
8516 else if (uAddend < 128)
8517 {
8518 pbCodeBuf[off++] = 0x83; /* add */
8519 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8520 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8521 }
8522 else
8523 {
8524 pbCodeBuf[off++] = 0x81; /* add */
8525 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8526 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8527 pbCodeBuf[off++] = 0;
8528 pbCodeBuf[off++] = 0;
8529 pbCodeBuf[off++] = 0;
8530 }
8531
8532#else
8533 /* sub tmp, gstgrp, uAddend */
8534 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8535 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
8536
8537#endif
8538
8539 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8540
8541 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8542
8543 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8544 return off;
8545}
8546
8547
8548
8549#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8550 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8551
8552/** Emits code for IEM_MC_SUB_GREG_U16. */
8553DECL_INLINE_THROW(uint32_t)
8554iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
8555{
8556 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8557 kIemNativeGstRegUse_ForUpdate);
8558
8559#ifdef RT_ARCH_AMD64
8560 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8561 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8562 if (idxGstTmpReg >= 8)
8563 pbCodeBuf[off++] = X86_OP_REX_B;
8564 if (uSubtrahend == 1)
8565 {
8566 pbCodeBuf[off++] = 0xff; /* dec */
8567 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8568 }
8569 else
8570 {
8571 pbCodeBuf[off++] = 0x81;
8572 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8573 pbCodeBuf[off++] = uSubtrahend;
8574 pbCodeBuf[off++] = 0;
8575 }
8576
8577#else
8578 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8579 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8580
8581 /* sub tmp, gstgrp, uSubtrahend */
8582 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
8583
8584 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8585 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8586
8587 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8588#endif
8589
8590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8591
8592 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8593
8594 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8595 return off;
8596}
8597
8598
8599#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
8600 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8601
8602#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
8603 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8604
8605/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
8606DECL_INLINE_THROW(uint32_t)
8607iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
8608{
8609 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8610 kIemNativeGstRegUse_ForUpdate);
8611
8612#ifdef RT_ARCH_AMD64
8613 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8614 if (f64Bit)
8615 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8616 else if (idxGstTmpReg >= 8)
8617 pbCodeBuf[off++] = X86_OP_REX_B;
8618 if (uSubtrahend == 1)
8619 {
8620 pbCodeBuf[off++] = 0xff; /* dec */
8621 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8622 }
8623 else if (uSubtrahend < 128)
8624 {
8625 pbCodeBuf[off++] = 0x83; /* sub */
8626 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8627 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8628 }
8629 else
8630 {
8631 pbCodeBuf[off++] = 0x81; /* sub */
8632 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8633 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8634 pbCodeBuf[off++] = 0;
8635 pbCodeBuf[off++] = 0;
8636 pbCodeBuf[off++] = 0;
8637 }
8638
8639#else
8640 /* sub tmp, gstgrp, uSubtrahend */
8641 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8642 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
8643
8644#endif
8645
8646 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8647
8648 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8649
8650 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8651 return off;
8652}
8653
8654
8655
8656/*********************************************************************************************************************************
8657* EFLAGS *
8658*********************************************************************************************************************************/
8659
8660#define IEM_MC_FETCH_EFLAGS(a_EFlags) \
8661 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
8662
8663/** Handles IEM_MC_FETCH_EFLAGS. */
8664DECL_INLINE_THROW(uint32_t)
8665iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8666{
8667 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8668 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8669
8670 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
8671 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8672 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8673 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
8674 return off;
8675}
8676
8677
8678#define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
8679 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
8680
8681/** Handles IEM_MC_COMMIT_EFLAGS. */
8682DECL_INLINE_THROW(uint32_t)
8683iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8684{
8685 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8686 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8687
8688 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
8689
8690#ifdef VBOX_STRICT
8691 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
8692 off = iemNativeEmitJnzToFixed(pReNative, off, 1);
8693 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
8694
8695 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
8696 off = iemNativeEmitJzToFixed(pReNative, off, 1);
8697 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
8698#endif
8699
8700 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8701 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
8702 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
8703 return off;
8704}
8705
8706
8707
8708/*********************************************************************************************************************************
8709* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
8710*********************************************************************************************************************************/
8711
8712#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
8713 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
8714
8715#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
8716 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
8717
8718#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
8719 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
8720
8721
8722/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
8723 * IEM_MC_FETCH_SREG_ZX_U64. */
8724DECL_INLINE_THROW(uint32_t)
8725iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
8726{
8727 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8728 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
8729 Assert(iSReg < X86_SREG_COUNT);
8730
8731 /*
8732 * For now, we will not create a shadow copy of a selector. The rational
8733 * is that since we do not recompile the popping and loading of segment
8734 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
8735 * pushing and moving to registers, there is only a small chance that the
8736 * shadow copy will be accessed again before the register is reloaded. One
8737 * scenario would be nested called in 16-bit code, but I doubt it's worth
8738 * the extra register pressure atm.
8739 *
8740 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
8741 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
8742 * store scencario covered at present (r160730).
8743 */
8744 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8745 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8746 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
8747 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8748 return off;
8749}
8750
8751
8752
8753/*********************************************************************************************************************************
8754* Register references. *
8755*********************************************************************************************************************************/
8756
8757#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
8758 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
8759
8760#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
8761 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
8762
8763/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
8764DECL_INLINE_THROW(uint32_t)
8765iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
8766{
8767 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8768 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8769 Assert(iGRegEx < 20);
8770
8771 if (iGRegEx < 16)
8772 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8773 else
8774 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
8775
8776 /* If we've delayed writing back the register value, flush it now. */
8777 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8778
8779 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8780 if (!fConst)
8781 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
8782
8783 return off;
8784}
8785
8786#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
8787 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
8788
8789#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
8790 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
8791
8792#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
8793 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
8794
8795#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
8796 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
8797
8798#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
8799 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
8800
8801#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
8802 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
8803
8804#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
8805 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
8806
8807#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
8808 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
8809
8810#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
8811 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
8812
8813#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
8814 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
8815
8816/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
8817DECL_INLINE_THROW(uint32_t)
8818iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
8819{
8820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8821 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8822 Assert(iGReg < 16);
8823
8824 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
8825
8826 /* If we've delayed writing back the register value, flush it now. */
8827 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
8828
8829 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8830 if (!fConst)
8831 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
8832
8833 return off;
8834}
8835
8836
8837#define IEM_MC_REF_EFLAGS(a_pEFlags) \
8838 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
8839
8840/** Handles IEM_MC_REF_EFLAGS. */
8841DECL_INLINE_THROW(uint32_t)
8842iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
8843{
8844 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8845 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8846
8847 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
8848
8849 /* If we've delayed writing back the register value, flush it now. */
8850 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
8851
8852 /* If there is a shadow copy of guest EFLAGS, flush it now. */
8853 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
8854
8855 return off;
8856}
8857
8858
8859/*********************************************************************************************************************************
8860* Effective Address Calculation *
8861*********************************************************************************************************************************/
8862#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
8863 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
8864
8865/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
8866 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
8867DECL_INLINE_THROW(uint32_t)
8868iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8869 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
8870{
8871 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
8872
8873 /*
8874 * Handle the disp16 form with no registers first.
8875 *
8876 * Convert to an immediate value, as that'll delay the register allocation
8877 * and assignment till the memory access / call / whatever and we can use
8878 * a more appropriate register (or none at all).
8879 */
8880 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
8881 {
8882 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
8883 return off;
8884 }
8885
8886 /* Determin the displacment. */
8887 uint16_t u16EffAddr;
8888 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
8889 {
8890 case 0: u16EffAddr = 0; break;
8891 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
8892 case 2: u16EffAddr = u16Disp; break;
8893 default: AssertFailedStmt(u16EffAddr = 0);
8894 }
8895
8896 /* Determine the registers involved. */
8897 uint8_t idxGstRegBase;
8898 uint8_t idxGstRegIndex;
8899 switch (bRm & X86_MODRM_RM_MASK)
8900 {
8901 case 0:
8902 idxGstRegBase = X86_GREG_xBX;
8903 idxGstRegIndex = X86_GREG_xSI;
8904 break;
8905 case 1:
8906 idxGstRegBase = X86_GREG_xBX;
8907 idxGstRegIndex = X86_GREG_xDI;
8908 break;
8909 case 2:
8910 idxGstRegBase = X86_GREG_xBP;
8911 idxGstRegIndex = X86_GREG_xSI;
8912 break;
8913 case 3:
8914 idxGstRegBase = X86_GREG_xBP;
8915 idxGstRegIndex = X86_GREG_xDI;
8916 break;
8917 case 4:
8918 idxGstRegBase = X86_GREG_xSI;
8919 idxGstRegIndex = UINT8_MAX;
8920 break;
8921 case 5:
8922 idxGstRegBase = X86_GREG_xDI;
8923 idxGstRegIndex = UINT8_MAX;
8924 break;
8925 case 6:
8926 idxGstRegBase = X86_GREG_xBP;
8927 idxGstRegIndex = UINT8_MAX;
8928 break;
8929#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
8930 default:
8931#endif
8932 case 7:
8933 idxGstRegBase = X86_GREG_xBX;
8934 idxGstRegIndex = UINT8_MAX;
8935 break;
8936 }
8937
8938 /*
8939 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
8940 */
8941 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
8942 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
8943 kIemNativeGstRegUse_ReadOnly);
8944 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
8945 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
8946 kIemNativeGstRegUse_ReadOnly)
8947 : UINT8_MAX;
8948#ifdef RT_ARCH_AMD64
8949 if (idxRegIndex == UINT8_MAX)
8950 {
8951 if (u16EffAddr == 0)
8952 {
8953 /* movxz ret, base */
8954 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
8955 }
8956 else
8957 {
8958 /* lea ret32, [base64 + disp32] */
8959 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
8960 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8961 if (idxRegRet >= 8 || idxRegBase >= 8)
8962 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
8963 pbCodeBuf[off++] = 0x8d;
8964 if (idxRegBase != X86_GREG_x12 /*SIB*/)
8965 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
8966 else
8967 {
8968 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
8969 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
8970 }
8971 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
8972 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
8973 pbCodeBuf[off++] = 0;
8974 pbCodeBuf[off++] = 0;
8975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8976
8977 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
8978 }
8979 }
8980 else
8981 {
8982 /* lea ret32, [index64 + base64 (+ disp32)] */
8983 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
8984 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8985 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
8986 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
8987 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
8988 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
8989 pbCodeBuf[off++] = 0x8d;
8990 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
8991 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
8992 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
8993 if (bMod == X86_MOD_MEM4)
8994 {
8995 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
8996 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
8997 pbCodeBuf[off++] = 0;
8998 pbCodeBuf[off++] = 0;
8999 }
9000 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9001 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9002 }
9003
9004#elif defined(RT_ARCH_ARM64)
9005 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9006 if (u16EffAddr == 0)
9007 {
9008 if (idxRegIndex == UINT8_MAX)
9009 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
9010 else
9011 {
9012 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
9013 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9014 }
9015 }
9016 else
9017 {
9018 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
9019 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
9020 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
9021 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9022 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
9023 else
9024 {
9025 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
9026 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9027 }
9028 if (idxRegIndex != UINT8_MAX)
9029 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
9030 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9031 }
9032
9033#else
9034# error "port me"
9035#endif
9036
9037 if (idxRegIndex != UINT8_MAX)
9038 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9039 iemNativeRegFreeTmp(pReNative, idxRegBase);
9040 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9041 return off;
9042}
9043
9044
9045#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
9046 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
9047
9048/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
9049 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
9050DECL_INLINE_THROW(uint32_t)
9051iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9052 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
9053{
9054 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9055
9056 /*
9057 * Handle the disp32 form with no registers first.
9058 *
9059 * Convert to an immediate value, as that'll delay the register allocation
9060 * and assignment till the memory access / call / whatever and we can use
9061 * a more appropriate register (or none at all).
9062 */
9063 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9064 {
9065 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
9066 return off;
9067 }
9068
9069 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9070 uint32_t u32EffAddr = 0;
9071 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9072 {
9073 case 0: break;
9074 case 1: u32EffAddr = (int8_t)u32Disp; break;
9075 case 2: u32EffAddr = u32Disp; break;
9076 default: AssertFailed();
9077 }
9078
9079 /* Get the register (or SIB) value. */
9080 uint8_t idxGstRegBase = UINT8_MAX;
9081 uint8_t idxGstRegIndex = UINT8_MAX;
9082 uint8_t cShiftIndex = 0;
9083 switch (bRm & X86_MODRM_RM_MASK)
9084 {
9085 case 0: idxGstRegBase = X86_GREG_xAX; break;
9086 case 1: idxGstRegBase = X86_GREG_xCX; break;
9087 case 2: idxGstRegBase = X86_GREG_xDX; break;
9088 case 3: idxGstRegBase = X86_GREG_xBX; break;
9089 case 4: /* SIB */
9090 {
9091 /* index /w scaling . */
9092 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9093 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9094 {
9095 case 0: idxGstRegIndex = X86_GREG_xAX; break;
9096 case 1: idxGstRegIndex = X86_GREG_xCX; break;
9097 case 2: idxGstRegIndex = X86_GREG_xDX; break;
9098 case 3: idxGstRegIndex = X86_GREG_xBX; break;
9099 case 4: cShiftIndex = 0; /*no index*/ break;
9100 case 5: idxGstRegIndex = X86_GREG_xBP; break;
9101 case 6: idxGstRegIndex = X86_GREG_xSI; break;
9102 case 7: idxGstRegIndex = X86_GREG_xDI; break;
9103 }
9104
9105 /* base */
9106 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
9107 {
9108 case 0: idxGstRegBase = X86_GREG_xAX; break;
9109 case 1: idxGstRegBase = X86_GREG_xCX; break;
9110 case 2: idxGstRegBase = X86_GREG_xDX; break;
9111 case 3: idxGstRegBase = X86_GREG_xBX; break;
9112 case 4:
9113 idxGstRegBase = X86_GREG_xSP;
9114 u32EffAddr += uSibAndRspOffset >> 8;
9115 break;
9116 case 5:
9117 if ((bRm & X86_MODRM_MOD_MASK) != 0)
9118 idxGstRegBase = X86_GREG_xBP;
9119 else
9120 {
9121 Assert(u32EffAddr == 0);
9122 u32EffAddr = u32Disp;
9123 }
9124 break;
9125 case 6: idxGstRegBase = X86_GREG_xSI; break;
9126 case 7: idxGstRegBase = X86_GREG_xDI; break;
9127 }
9128 break;
9129 }
9130 case 5: idxGstRegBase = X86_GREG_xBP; break;
9131 case 6: idxGstRegBase = X86_GREG_xSI; break;
9132 case 7: idxGstRegBase = X86_GREG_xDI; break;
9133 }
9134
9135 /*
9136 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9137 * the start of the function.
9138 */
9139 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9140 {
9141 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
9142 return off;
9143 }
9144
9145 /*
9146 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9147 */
9148 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9149 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9150 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9151 kIemNativeGstRegUse_ReadOnly);
9152 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9153 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9154 kIemNativeGstRegUse_ReadOnly);
9155
9156 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9157 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9158 {
9159 idxRegBase = idxRegIndex;
9160 idxRegIndex = UINT8_MAX;
9161 }
9162
9163#ifdef RT_ARCH_AMD64
9164 if (idxRegIndex == UINT8_MAX)
9165 {
9166 if (u32EffAddr == 0)
9167 {
9168 /* mov ret, base */
9169 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9170 }
9171 else
9172 {
9173 /* lea ret32, [base64 + disp32] */
9174 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9175 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9176 if (idxRegRet >= 8 || idxRegBase >= 8)
9177 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9178 pbCodeBuf[off++] = 0x8d;
9179 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9180 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9181 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9182 else
9183 {
9184 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9185 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9186 }
9187 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9188 if (bMod == X86_MOD_MEM4)
9189 {
9190 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9191 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9192 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9193 }
9194 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9195 }
9196 }
9197 else
9198 {
9199 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9200 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9201 if (idxRegBase == UINT8_MAX)
9202 {
9203 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
9204 if (idxRegRet >= 8 || idxRegIndex >= 8)
9205 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9206 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9207 pbCodeBuf[off++] = 0x8d;
9208 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9209 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9210 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9211 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9212 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9213 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9214 }
9215 else
9216 {
9217 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9218 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9219 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9220 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9221 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9222 pbCodeBuf[off++] = 0x8d;
9223 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9224 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9225 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9226 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9227 if (bMod != X86_MOD_MEM0)
9228 {
9229 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9230 if (bMod == X86_MOD_MEM4)
9231 {
9232 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9233 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9234 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9235 }
9236 }
9237 }
9238 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9239 }
9240
9241#elif defined(RT_ARCH_ARM64)
9242 if (u32EffAddr == 0)
9243 {
9244 if (idxRegIndex == UINT8_MAX)
9245 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9246 else if (idxRegBase == UINT8_MAX)
9247 {
9248 if (cShiftIndex == 0)
9249 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
9250 else
9251 {
9252 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9253 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
9254 }
9255 }
9256 else
9257 {
9258 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9259 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9260 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9261 }
9262 }
9263 else
9264 {
9265 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
9266 {
9267 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9268 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
9269 }
9270 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
9271 {
9272 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9273 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9274 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
9275 }
9276 else
9277 {
9278 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
9279 if (idxRegBase != UINT8_MAX)
9280 {
9281 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9282 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9283 }
9284 }
9285 if (idxRegIndex != UINT8_MAX)
9286 {
9287 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9288 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9289 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9290 }
9291 }
9292
9293#else
9294# error "port me"
9295#endif
9296
9297 if (idxRegIndex != UINT8_MAX)
9298 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9299 if (idxRegBase != UINT8_MAX)
9300 iemNativeRegFreeTmp(pReNative, idxRegBase);
9301 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9302 return off;
9303}
9304
9305
9306#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9307 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9308 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9309
9310#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9311 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9312 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9313
9314#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9315 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9316 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
9317
9318/**
9319 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
9320 *
9321 * @returns New off.
9322 * @param pReNative .
9323 * @param off .
9324 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
9325 * bit 4 to REX.X. The two bits are part of the
9326 * REG sub-field, which isn't needed in this
9327 * function.
9328 * @param uSibAndRspOffset Two parts:
9329 * - The first 8 bits make up the SIB byte.
9330 * - The next 8 bits are the fixed RSP/ESP offset
9331 * in case of a pop [xSP].
9332 * @param u32Disp The displacement byte/word/dword, if any.
9333 * @param cbInstr The size of the fully decoded instruction. Used
9334 * for RIP relative addressing.
9335 * @param idxVarRet The result variable number.
9336 * @param f64Bit Whether to use a 64-bit or 32-bit address size
9337 * when calculating the address.
9338 *
9339 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
9340 */
9341DECL_INLINE_THROW(uint32_t)
9342iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
9343 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
9344{
9345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9346
9347 /*
9348 * Special case the rip + disp32 form first.
9349 */
9350 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9351 {
9352 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9353 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
9354 kIemNativeGstRegUse_ReadOnly);
9355#ifdef RT_ARCH_AMD64
9356 if (f64Bit)
9357 {
9358 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
9359 if ((int32_t)offFinalDisp == offFinalDisp)
9360 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
9361 else
9362 {
9363 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
9364 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
9365 }
9366 }
9367 else
9368 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
9369
9370#elif defined(RT_ARCH_ARM64)
9371 if (f64Bit)
9372 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9373 (int64_t)(int32_t)u32Disp + cbInstr);
9374 else
9375 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9376 (int32_t)u32Disp + cbInstr);
9377
9378#else
9379# error "Port me!"
9380#endif
9381 iemNativeRegFreeTmp(pReNative, idxRegPc);
9382 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9383 return off;
9384 }
9385
9386 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9387 int64_t i64EffAddr = 0;
9388 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9389 {
9390 case 0: break;
9391 case 1: i64EffAddr = (int8_t)u32Disp; break;
9392 case 2: i64EffAddr = (int32_t)u32Disp; break;
9393 default: AssertFailed();
9394 }
9395
9396 /* Get the register (or SIB) value. */
9397 uint8_t idxGstRegBase = UINT8_MAX;
9398 uint8_t idxGstRegIndex = UINT8_MAX;
9399 uint8_t cShiftIndex = 0;
9400 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
9401 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
9402 else /* SIB: */
9403 {
9404 /* index /w scaling . */
9405 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9406 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9407 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
9408 if (idxGstRegIndex == 4)
9409 {
9410 /* no index */
9411 cShiftIndex = 0;
9412 idxGstRegIndex = UINT8_MAX;
9413 }
9414
9415 /* base */
9416 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
9417 if (idxGstRegBase == 4)
9418 {
9419 /* pop [rsp] hack */
9420 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
9421 }
9422 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
9423 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
9424 {
9425 /* mod=0 and base=5 -> disp32, no base reg. */
9426 Assert(i64EffAddr == 0);
9427 i64EffAddr = (int32_t)u32Disp;
9428 idxGstRegBase = UINT8_MAX;
9429 }
9430 }
9431
9432 /*
9433 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9434 * the start of the function.
9435 */
9436 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9437 {
9438 if (f64Bit)
9439 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
9440 else
9441 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
9442 return off;
9443 }
9444
9445 /*
9446 * Now emit code that calculates:
9447 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9448 * or if !f64Bit:
9449 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9450 */
9451 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9452 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9453 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9454 kIemNativeGstRegUse_ReadOnly);
9455 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9456 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9457 kIemNativeGstRegUse_ReadOnly);
9458
9459 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9460 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9461 {
9462 idxRegBase = idxRegIndex;
9463 idxRegIndex = UINT8_MAX;
9464 }
9465
9466#ifdef RT_ARCH_AMD64
9467 uint8_t bFinalAdj;
9468 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
9469 bFinalAdj = 0; /* likely */
9470 else
9471 {
9472 /* pop [rsp] with a problematic disp32 value. Split out the
9473 RSP offset and add it separately afterwards (bFinalAdj). */
9474 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
9475 Assert(idxGstRegBase == X86_GREG_xSP);
9476 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
9477 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
9478 Assert(bFinalAdj != 0);
9479 i64EffAddr -= bFinalAdj;
9480 Assert((int32_t)i64EffAddr == i64EffAddr);
9481 }
9482 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
9483//pReNative->pInstrBuf[off++] = 0xcc;
9484
9485 if (idxRegIndex == UINT8_MAX)
9486 {
9487 if (u32EffAddr == 0)
9488 {
9489 /* mov ret, base */
9490 if (f64Bit)
9491 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
9492 else
9493 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9494 }
9495 else
9496 {
9497 /* lea ret, [base + disp32] */
9498 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9499 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9500 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
9501 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9502 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9503 | (f64Bit ? X86_OP_REX_W : 0);
9504 pbCodeBuf[off++] = 0x8d;
9505 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9506 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9507 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9508 else
9509 {
9510 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9511 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9512 }
9513 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9514 if (bMod == X86_MOD_MEM4)
9515 {
9516 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9517 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9518 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9519 }
9520 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9521 }
9522 }
9523 else
9524 {
9525 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9526 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9527 if (idxRegBase == UINT8_MAX)
9528 {
9529 /* lea ret, [(index64 << cShiftIndex) + disp32] */
9530 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
9531 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9532 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9533 | (f64Bit ? X86_OP_REX_W : 0);
9534 pbCodeBuf[off++] = 0x8d;
9535 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9536 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9537 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9538 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9539 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9540 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9541 }
9542 else
9543 {
9544 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9545 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9546 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9547 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9548 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9549 | (f64Bit ? X86_OP_REX_W : 0);
9550 pbCodeBuf[off++] = 0x8d;
9551 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9552 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9553 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9554 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9555 if (bMod != X86_MOD_MEM0)
9556 {
9557 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9558 if (bMod == X86_MOD_MEM4)
9559 {
9560 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9561 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9562 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9563 }
9564 }
9565 }
9566 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9567 }
9568
9569 if (!bFinalAdj)
9570 { /* likely */ }
9571 else
9572 {
9573 Assert(f64Bit);
9574 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
9575 }
9576
9577#elif defined(RT_ARCH_ARM64)
9578 if (i64EffAddr == 0)
9579 {
9580 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9581 if (idxRegIndex == UINT8_MAX)
9582 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
9583 else if (idxRegBase != UINT8_MAX)
9584 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9585 f64Bit, false /*fSetFlags*/, cShiftIndex);
9586 else
9587 {
9588 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
9589 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
9590 }
9591 }
9592 else
9593 {
9594 if (f64Bit)
9595 { /* likely */ }
9596 else
9597 i64EffAddr = (int32_t)i64EffAddr;
9598
9599 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
9600 {
9601 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9602 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
9603 }
9604 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
9605 {
9606 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9607 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
9608 }
9609 else
9610 {
9611 if (f64Bit)
9612 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
9613 else
9614 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
9615 if (idxRegBase != UINT8_MAX)
9616 {
9617 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9618 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
9619 }
9620 }
9621 if (idxRegIndex != UINT8_MAX)
9622 {
9623 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9624 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9625 f64Bit, false /*fSetFlags*/, cShiftIndex);
9626 }
9627 }
9628
9629#else
9630# error "port me"
9631#endif
9632
9633 if (idxRegIndex != UINT8_MAX)
9634 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9635 if (idxRegBase != UINT8_MAX)
9636 iemNativeRegFreeTmp(pReNative, idxRegBase);
9637 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9638 return off;
9639}
9640
9641
9642
9643
9644/*********************************************************************************************************************************
9645* Memory fetches and stores common *
9646*********************************************************************************************************************************/
9647
9648typedef enum IEMNATIVEMITMEMOP
9649{
9650 kIemNativeEmitMemOp_Store = 0,
9651 kIemNativeEmitMemOp_Fetch,
9652 kIemNativeEmitMemOp_Fetch_Zx_U16,
9653 kIemNativeEmitMemOp_Fetch_Zx_U32,
9654 kIemNativeEmitMemOp_Fetch_Zx_U64,
9655 kIemNativeEmitMemOp_Fetch_Sx_U16,
9656 kIemNativeEmitMemOp_Fetch_Sx_U32,
9657 kIemNativeEmitMemOp_Fetch_Sx_U64
9658} IEMNATIVEMITMEMOP;
9659
9660/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
9661 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
9662 * (with iSegReg = UINT8_MAX). */
9663DECL_INLINE_THROW(uint32_t)
9664iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
9665 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
9666 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
9667{
9668 /*
9669 * Assert sanity.
9670 */
9671 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
9672 Assert( enmOp != kIemNativeEmitMemOp_Store
9673 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
9674 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
9675 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9676 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
9677 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
9678 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9679 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9680 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
9681 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9682#ifdef VBOX_STRICT
9683 if (iSegReg == UINT8_MAX)
9684 {
9685 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9686 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9687 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9688 switch (cbMem)
9689 {
9690 case 1:
9691 Assert( pfnFunction
9692 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
9693 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9694 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9695 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9696 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9697 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
9698 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
9699 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
9700 : UINT64_C(0xc000b000a0009000) ));
9701 break;
9702 case 2:
9703 Assert( pfnFunction
9704 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
9705 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
9706 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
9707 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
9708 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
9709 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
9710 : UINT64_C(0xc000b000a0009000) ));
9711 break;
9712 case 4:
9713 Assert( pfnFunction
9714 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
9715 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
9716 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
9717 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
9718 : UINT64_C(0xc000b000a0009000) ));
9719 break;
9720 case 8:
9721 Assert( pfnFunction
9722 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
9723 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
9724 : UINT64_C(0xc000b000a0009000) ));
9725 break;
9726 }
9727 }
9728 else
9729 {
9730 Assert(iSegReg < 6);
9731 switch (cbMem)
9732 {
9733 case 1:
9734 Assert( pfnFunction
9735 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
9736 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
9737 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
9738 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
9739 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
9740 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
9741 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
9742 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
9743 : UINT64_C(0xc000b000a0009000) ));
9744 break;
9745 case 2:
9746 Assert( pfnFunction
9747 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
9748 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
9749 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
9750 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
9751 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
9752 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
9753 : UINT64_C(0xc000b000a0009000) ));
9754 break;
9755 case 4:
9756 Assert( pfnFunction
9757 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
9758 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
9759 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
9760 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
9761 : UINT64_C(0xc000b000a0009000) ));
9762 break;
9763 case 8:
9764 Assert( pfnFunction
9765 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
9766 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
9767 : UINT64_C(0xc000b000a0009000) ));
9768 break;
9769 }
9770 }
9771#endif
9772
9773#ifdef VBOX_STRICT
9774 /*
9775 * Check that the fExec flags we've got make sense.
9776 */
9777 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9778#endif
9779
9780 /*
9781 * To keep things simple we have to commit any pending writes first as we
9782 * may end up making calls.
9783 */
9784 /** @todo we could postpone this till we make the call and reload the
9785 * registers after returning from the call. Not sure if that's sensible or
9786 * not, though. */
9787 off = iemNativeRegFlushPendingWrites(pReNative, off);
9788
9789 /*
9790 * Move/spill/flush stuff out of call-volatile registers.
9791 * This is the easy way out. We could contain this to the tlb-miss branch
9792 * by saving and restoring active stuff here.
9793 */
9794 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9795 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9796
9797 /*
9798 * Define labels and allocate the result register (trying for the return
9799 * register if we can).
9800 */
9801 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9802 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
9803 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9804 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX /* special case value storing below */
9805 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9806 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
9807 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
9808
9809 /*
9810 * First we try to go via the TLB.
9811 */
9812//pReNative->pInstrBuf[off++] = 0xcc;
9813 /** @todo later. */
9814 RT_NOREF(fAlignMask, cbMem);
9815
9816 /*
9817 * Call helper to do the fetching.
9818 * We flush all guest register shadow copies here.
9819 */
9820 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
9821
9822#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9823 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9824#else
9825 RT_NOREF(idxInstr);
9826#endif
9827
9828 uint8_t idxRegArgValue;
9829 if (iSegReg == UINT8_MAX)
9830 idxRegArgValue = IEMNATIVE_CALL_ARG2_GREG;
9831 else
9832 {
9833 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
9834 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9835 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
9836
9837 idxRegArgValue = IEMNATIVE_CALL_ARG3_GREG;
9838 }
9839
9840 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
9841 if (enmOp == kIemNativeEmitMemOp_Store)
9842 {
9843 if (pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate)
9844 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgValue, pReNative->Core.aVars[idxVarValue].u.uValue);
9845 else
9846 {
9847 uint8_t const idxRegVarValue = pReNative->Core.aVars[idxVarValue].idxReg;
9848 if (idxRegVarValue < RT_ELEMENTS(pReNative->Core.aHstRegs))
9849 {
9850 Assert(!(RT_BIT_32(idxRegVarValue) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9851 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgValue, idxRegVarValue);
9852 }
9853 else
9854 {
9855 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarValue].idxStackSlot;
9856 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9857 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgValue, iemNativeStackCalcBpDisp(idxStackSlot));
9858 }
9859 }
9860 }
9861
9862 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
9863 if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
9864 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG,
9865 pReNative->Core.aVars[idxVarGCPtrMem].u.uValue + offDisp);
9866 else
9867 {
9868 uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
9869 if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
9870 {
9871 Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9872 if (!offDisp)
9873 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem);
9874 else
9875 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem, offDisp);
9876 }
9877 else
9878 {
9879 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarGCPtrMem].idxStackSlot;
9880 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9881 AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
9882 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iemNativeStackCalcBpDisp(idxStackSlot));
9883 if (offDisp)
9884 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offDisp);
9885 }
9886 }
9887
9888 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9889 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9890
9891 /* Done setting up parameters, make the call. */
9892 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9893
9894 /*
9895 * Put the result in the right register if this is a fetch.
9896 */
9897 if (enmOp != kIemNativeEmitMemOp_Store)
9898 {
9899 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
9900 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
9901 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
9902 iemNativeVarRegisterRelease(pReNative, idxVarValue);
9903 }
9904
9905 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9906
9907 return off;
9908}
9909
9910
9911
9912/*********************************************************************************************************************************
9913* Memory fetches (IEM_MEM_FETCH_XXX). *
9914*********************************************************************************************************************************/
9915
9916/* 8-bit segmented: */
9917#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
9918 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
9919 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
9920 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
9921
9922#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
9923 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
9924 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
9925 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
9926
9927#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9928 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9929 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
9930 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
9931
9932#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9933 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9934 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9935 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
9936
9937#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
9938 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
9939 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
9940 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
9941
9942#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9943 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9944 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
9945 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
9946
9947#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9948 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9949 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9950 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
9951
9952/* 16-bit segmented: */
9953#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
9954 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
9955 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
9956 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
9957
9958#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
9959 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
9960 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
9961 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
9962
9963#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9964 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9965 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
9966 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
9967
9968#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9969 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9970 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9971 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
9972
9973#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9974 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9975 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
9976 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
9977
9978#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9979 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9980 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9981 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
9982
9983
9984/* 32-bit segmented: */
9985#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9986 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9987 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
9988 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
9989
9990#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
9991 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9992 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
9993 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
9994
9995#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9996 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9997 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9998 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
9999
10000#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10001 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10002 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10003 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10004
10005
10006/* 64-bit segmented: */
10007#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10008 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10009 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10010 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
10011
10012
10013
10014/* 8-bit flat: */
10015#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
10016 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
10017 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10018 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10019
10020#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
10021 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10022 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10023 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10024
10025#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
10026 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10027 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10028 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10029
10030#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
10031 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10032 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10033 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10034
10035#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
10036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10037 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10038 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10039
10040#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
10041 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10042 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10043 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10044
10045#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
10046 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10047 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10048 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10049
10050
10051/* 16-bit flat: */
10052#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
10053 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10054 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10055 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10056
10057#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
10058 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10059 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10060 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10061
10062#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
10063 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10064 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10065 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10066
10067#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
10068 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10069 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10070 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10071
10072#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
10073 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10074 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10075 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10076
10077#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
10078 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10079 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10080 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10081
10082/* 32-bit flat: */
10083#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
10084 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10085 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10086 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10087
10088#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
10089 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10090 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10091 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10092
10093#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
10094 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10095 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10096 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10097
10098#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
10099 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10100 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10101 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10102
10103/* 64-bit flat: */
10104#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
10105 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10106 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10107 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
10108
10109
10110
10111/*********************************************************************************************************************************
10112* Memory stores (IEM_MEM_STORE_XXX). *
10113*********************************************************************************************************************************/
10114
10115#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
10116 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
10117 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10118 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
10119
10120#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
10121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
10122 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10123 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
10124
10125#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
10126 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
10127 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10128 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
10129
10130#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
10131 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
10132 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
10133 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
10134
10135
10136#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
10137 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
10138 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10139 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
10140
10141#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
10142 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
10143 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10144 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
10145
10146#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
10147 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
10148 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10149 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
10150
10151#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
10152 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
10153 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
10154 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
10155
10156
10157#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
10158 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10159 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
10160
10161#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
10162 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10163 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
10164
10165#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
10166 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10167 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
10168
10169#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
10170 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10171 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
10172
10173
10174#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
10175 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10176 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
10177
10178#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
10179 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10180 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
10181
10182#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
10183 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10184 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
10185
10186#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
10187 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10188 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
10189
10190/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
10191 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
10192DECL_INLINE_THROW(uint32_t)
10193iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
10194 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
10195{
10196 /*
10197 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
10198 * to do the grunt work.
10199 */
10200 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
10201 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
10202 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
10203 pfnFunction, idxInstr);
10204 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
10205 return off;
10206}
10207
10208
10209
10210/*********************************************************************************************************************************
10211* Stack Accesses. *
10212*********************************************************************************************************************************/
10213/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
10214#define IEM_MC_PUSH_U16(a_u16Value) \
10215 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
10216 (uintptr_t)iemNativeHlpStackPushU16, pCallEntry->idxInstr)
10217#define IEM_MC_PUSH_U32(a_u32Value) \
10218 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
10219 (uintptr_t)iemNativeHlpStackPushU32, pCallEntry->idxInstr)
10220#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
10221 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
10222 (uintptr_t)iemNativeHlpStackPushU32SReg, pCallEntry->idxInstr)
10223#define IEM_MC_PUSH_U64(a_u64Value) \
10224 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
10225 (uintptr_t)iemNativeHlpStackPushU64, pCallEntry->idxInstr)
10226
10227#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
10228 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
10229 (uintptr_t)iemNativeHlpStackFlat32PushU16, pCallEntry->idxInstr)
10230#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
10231 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
10232 (uintptr_t)iemNativeHlpStackFlat32PushU32, pCallEntry->idxInstr)
10233#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
10234 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
10235 (uintptr_t)iemNativeHlpStackFlat32PushU32SReg, pCallEntry->idxInstr)
10236
10237#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
10238 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
10239 (uintptr_t)iemNativeHlpStackFlat64PushU16, pCallEntry->idxInstr)
10240#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
10241 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
10242 (uintptr_t)iemNativeHlpStackFlat64PushU64, pCallEntry->idxInstr)
10243
10244/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
10245DECL_INLINE_THROW(uint32_t)
10246iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
10247 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
10248{
10249 /*
10250 * Assert sanity.
10251 */
10252 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10253#ifdef VBOX_STRICT
10254 if (RT_BYTE2(cBitsVarAndFlat) != 0)
10255 {
10256 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10257 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10258 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10259 Assert( pfnFunction
10260 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU16
10261 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32
10262 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32SReg
10263 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU16
10264 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU64
10265 : UINT64_C(0xc000b000a0009000) ));
10266 }
10267 else
10268 Assert( pfnFunction
10269 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU16
10270 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU32
10271 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackPushU32SReg
10272 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU64
10273 : UINT64_C(0xc000b000a0009000) ));
10274#endif
10275
10276#ifdef VBOX_STRICT
10277 /*
10278 * Check that the fExec flags we've got make sense.
10279 */
10280 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10281#endif
10282
10283 /*
10284 * To keep things simple we have to commit any pending writes first as we
10285 * may end up making calls.
10286 */
10287 /** @todo we could postpone this till we make the call and reload the
10288 * registers after returning from the call. Not sure if that's sensible or
10289 * not, though. */
10290 off = iemNativeRegFlushPendingWrites(pReNative, off);
10291
10292 /*
10293 * Move/spill/flush stuff out of call-volatile registers, keeping whatever
10294 * idxVarValue might be occupying.
10295 *
10296 * This is the easy way out. We could contain this to the tlb-miss branch
10297 * by saving and restoring active stuff here.
10298 */
10299 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10300 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarValue));
10301
10302 /* For now, flush any shadow copy of the xSP register. */
10303 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
10304
10305 /*
10306 * Define labels and allocate the result register (trying for the return
10307 * register if we can).
10308 */
10309 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10310 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10311 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10312
10313 /*
10314 * First we try to go via the TLB.
10315 */
10316//pReNative->pInstrBuf[off++] = 0xcc;
10317 /** @todo later. */
10318 RT_NOREF(cBitsVarAndFlat);
10319
10320 /*
10321 * Call helper to do the popping.
10322 */
10323 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10324
10325#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10326 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10327#else
10328 RT_NOREF(idxInstr);
10329#endif
10330
10331 /* IEMNATIVE_CALL_ARG1_GREG = idxVarValue (first) */
10332 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarValue,
10333 0 /*offAddend*/, true /*fVarAllowInVolatileReg*/);
10334
10335 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10336 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10337
10338 /* Done setting up parameters, make the call. */
10339 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10340
10341 /* The value variable is implictly flushed. */
10342 iemNativeVarFreeLocal(pReNative, idxVarValue);
10343
10344 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10345
10346 return off;
10347}
10348
10349
10350
10351/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
10352#define IEM_MC_POP_GREG_U16(a_iGReg) \
10353 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
10354 (uintptr_t)iemNativeHlpStackPopGRegU16, pCallEntry->idxInstr)
10355#define IEM_MC_POP_GREG_U32(a_iGReg) \
10356 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
10357 (uintptr_t)iemNativeHlpStackPopGRegU32, pCallEntry->idxInstr)
10358#define IEM_MC_POP_GREG_U64(a_iGReg) \
10359 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
10360 (uintptr_t)iemNativeHlpStackPopGRegU64, pCallEntry->idxInstr)
10361
10362#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
10363 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
10364 (uintptr_t)iemNativeHlpStackFlat32PopGRegU16, pCallEntry->idxInstr)
10365#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
10366 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
10367 (uintptr_t)iemNativeHlpStackFlat32PopGRegU32, pCallEntry->idxInstr)
10368
10369#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
10370 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
10371 (uintptr_t)iemNativeHlpStackFlat64PopGRegU16, pCallEntry->idxInstr)
10372#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
10373 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
10374 (uintptr_t)iemNativeHlpStackFlat64PopGRegU64, pCallEntry->idxInstr)
10375
10376/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
10377DECL_INLINE_THROW(uint32_t)
10378iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
10379 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
10380{
10381 /*
10382 * Assert sanity.
10383 */
10384 Assert(idxGReg < 16);
10385#ifdef VBOX_STRICT
10386 if (RT_BYTE2(cBitsVarAndFlat) != 0)
10387 {
10388 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10389 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10390 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10391 Assert( pfnFunction
10392 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU16
10393 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU32
10394 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU16
10395 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU64
10396 : UINT64_C(0xc000b000a0009000) ));
10397 }
10398 else
10399 Assert( pfnFunction
10400 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU16
10401 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU32
10402 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU64
10403 : UINT64_C(0xc000b000a0009000) ));
10404#endif
10405
10406#ifdef VBOX_STRICT
10407 /*
10408 * Check that the fExec flags we've got make sense.
10409 */
10410 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10411#endif
10412
10413 /*
10414 * To keep things simple we have to commit any pending writes first as we
10415 * may end up making calls.
10416 */
10417 /** @todo we could postpone this till we make the call and reload the
10418 * registers after returning from the call. Not sure if that's sensible or
10419 * not, though. */
10420 off = iemNativeRegFlushPendingWrites(pReNative, off);
10421
10422 /*
10423 * Move/spill/flush stuff out of call-volatile registers.
10424 * This is the easy way out. We could contain this to the tlb-miss branch
10425 * by saving and restoring active stuff here.
10426 */
10427 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10428 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10429
10430 /* For now, flush the any shadow copy of the guest register that is about
10431 to be popped and the xSP register. */
10432 iemNativeRegFlushGuestShadows(pReNative,
10433 RT_BIT_64(IEMNATIVEGSTREG_GPR(idxGReg)) | RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
10434
10435 /*
10436 * Define labels and allocate the result register (trying for the return
10437 * register if we can).
10438 */
10439 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10440 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10441 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10442
10443 /*
10444 * First we try to go via the TLB.
10445 */
10446//pReNative->pInstrBuf[off++] = 0xcc;
10447 /** @todo later. */
10448 RT_NOREF(cBitsVarAndFlat);
10449
10450 /*
10451 * Call helper to do the popping.
10452 */
10453 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10454
10455#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10456 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10457#else
10458 RT_NOREF(idxInstr);
10459#endif
10460
10461 /* IEMNATIVE_CALL_ARG1_GREG = iGReg */
10462 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxGReg);
10463
10464 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10465 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10466
10467 /* Done setting up parameters, make the call. */
10468 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10469
10470 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10471
10472 return off;
10473}
10474
10475
10476
10477/*********************************************************************************************************************************
10478* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
10479*********************************************************************************************************************************/
10480
10481#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10482 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10483 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10484 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
10485
10486#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10487 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10488 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10489 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
10490
10491#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10492 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10493 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
10494 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
10495
10496
10497#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10498 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10499 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10500 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
10501
10502#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10503 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10504 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10505 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
10506
10507#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10508 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10509 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10510 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
10511
10512#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10513 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
10514 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10515 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
10516
10517
10518#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10519 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10520 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10521 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
10522
10523#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10524 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10525 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10526 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
10527
10528#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10529 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10530 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10531 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
10532
10533#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10534 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
10535 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10536 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
10537
10538
10539#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10540 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10541 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10542 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
10543
10544#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10545 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10546 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10547 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
10548
10549#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10550 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10551 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10552 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
10553
10554#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10555 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
10556 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10557 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
10558
10559
10560#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10561 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
10562 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10563 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
10564
10565#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10566 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
10567 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
10568 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
10569
10570
10571#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10572 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
10573 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10574 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
10575
10576#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10577 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
10578 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10579 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
10580
10581#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10582 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
10583 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10584 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
10585
10586
10587
10588#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
10589 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10590 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10591 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
10592
10593#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
10594 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10595 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10596 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
10597
10598#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
10599 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10600 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
10601 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
10602
10603
10604#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
10605 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10606 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10607 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
10608
10609#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
10610 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10611 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10612 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
10613
10614#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
10615 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10616 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10617 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
10618
10619#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
10620 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
10621 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10622 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
10623
10624
10625#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
10626 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10627 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10628 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
10629
10630#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
10631 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10632 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10633 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
10634
10635#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
10636 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10637 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10638 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
10639
10640#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
10641 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
10642 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10643 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
10644
10645
10646#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
10647 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10648 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10649 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
10650
10651#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
10652 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10653 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10654 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
10655
10656#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
10657 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10658 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10659 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
10660
10661#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
10662 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
10663 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10664 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
10665
10666
10667#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
10668 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
10669 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10670 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
10671
10672#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
10673 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
10674 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
10675 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
10676
10677
10678#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
10679 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
10680 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10681 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
10682
10683#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
10684 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
10685 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10686 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
10687
10688#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
10689 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
10690 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10691 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
10692
10693
10694DECL_INLINE_THROW(uint32_t)
10695iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
10696 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
10697 uintptr_t pfnFunction, uint8_t idxInstr)
10698{
10699 /*
10700 * Assert sanity.
10701 */
10702 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
10703 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
10704 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
10705 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10706
10707 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
10708 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
10709 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
10710 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10711
10712 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
10713 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10714 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
10715 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10716
10717 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
10718
10719 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
10720
10721#ifdef VBOX_STRICT
10722# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
10723 ( ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
10724 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
10725 : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
10726 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
10727
10728 if (iSegReg == UINT8_MAX)
10729 {
10730 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10731 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10732 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10733 switch (cbMem)
10734 {
10735 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
10736 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
10737 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
10738 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
10739 case 10:
10740 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
10741 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
10742 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
10743 break;
10744 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
10745# if 0
10746 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
10747 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
10748# endif
10749 default: AssertFailed(); break;
10750 }
10751 }
10752 else
10753 {
10754 Assert(iSegReg < 6);
10755 switch (cbMem)
10756 {
10757 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
10758 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
10759 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
10760 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
10761 case 10:
10762 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
10763 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
10764 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
10765 break;
10766 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
10767# if 0
10768 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
10769 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
10770# endif
10771 default: AssertFailed(); break;
10772 }
10773 }
10774# undef IEM_MAP_HLP_FN
10775#endif
10776
10777#ifdef VBOX_STRICT
10778 /*
10779 * Check that the fExec flags we've got make sense.
10780 */
10781 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10782#endif
10783
10784 /*
10785 * To keep things simple we have to commit any pending writes first as we
10786 * may end up making calls.
10787 */
10788 /** @todo we could postpone this till we make the call and reload the
10789 * registers after returning from the call. Not sure if that's sensible or
10790 * not, though. */
10791 off = iemNativeRegFlushPendingWrites(pReNative, off);
10792
10793 /*
10794 * Move/spill/flush stuff out of call-volatile registers.
10795 * This is the easy way out. We could contain this to the tlb-miss branch
10796 * by saving and restoring active stuff here.
10797 */
10798 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10799 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10800
10801 /*
10802 * Define labels and allocate the result register (trying for the return
10803 * register if we can - which we of course can, given the above call).
10804 */
10805 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10806 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10807 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10808 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
10809 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
10810 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
10811
10812 /*
10813 * First we try to go via the TLB.
10814 */
10815//pReNative->pInstrBuf[off++] = 0xcc;
10816 /** @todo later. */
10817 RT_NOREF(fAccess, fAlignMask, cbMem);
10818
10819 /*
10820 * Call helper to do the fetching.
10821 * We flush all guest register shadow copies here.
10822 */
10823 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10824
10825#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10826 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10827#else
10828 RT_NOREF(idxInstr);
10829#endif
10830
10831 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
10832 if (iSegReg != UINT8_MAX)
10833 {
10834 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
10835 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
10836 }
10837
10838 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem */
10839 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem);
10840
10841 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo */
10842 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
10843 off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo, true /*fFlushShadows*/);
10844
10845 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10846 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10847
10848 /* Done setting up parameters, make the call. */
10849 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10850
10851 /*
10852 * Put the result in the right register .
10853 */
10854 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
10855 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
10856 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
10857 iemNativeVarRegisterRelease(pReNative, idxVarMem);
10858
10859 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10860
10861 return off;
10862}
10863
10864
10865#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
10866 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
10867 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
10868
10869#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
10870 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
10871 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
10872
10873#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
10874 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
10875 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
10876
10877DECL_INLINE_THROW(uint32_t)
10878iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
10879 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
10880{
10881 /*
10882 * Assert sanity.
10883 */
10884 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
10885 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
10886 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
10887 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
10888#ifdef VBOX_STRICT
10889 switch (fAccess & IEM_ACCESS_TYPE_MASK)
10890 {
10891 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
10892 case IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
10893 case IEM_ACCESS_TYPE_READ: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
10894 default: AssertFailed();
10895 }
10896#else
10897 RT_NOREF(fAccess);
10898#endif
10899
10900 /*
10901 * To keep things simple we have to commit any pending writes first as we
10902 * may end up making calls (there shouldn't be any at this point, so this
10903 * is just for consistency).
10904 */
10905 /** @todo we could postpone this till we make the call and reload the
10906 * registers after returning from the call. Not sure if that's sensible or
10907 * not, though. */
10908 off = iemNativeRegFlushPendingWrites(pReNative, off);
10909
10910 /*
10911 * Move/spill/flush stuff out of call-volatile registers.
10912 *
10913 * We exclude any register holding the bUnmapInfo variable, as we'll be
10914 * checking it after returning from the call and will free it afterwards.
10915 */
10916 /** @todo save+restore active registers and maybe guest shadows in miss
10917 * scenario. */
10918 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
10919
10920 /*
10921 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
10922 * to call the unmap helper function.
10923 *
10924 * The likelyhood of it being zero is higher than for the TLB hit when doing
10925 * the mapping, as a TLB miss for an well aligned and unproblematic memory
10926 * access should also end up with a mapping that won't need special unmapping.
10927 */
10928 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
10929 * should speed up things for the pure interpreter as well when TLBs
10930 * are enabled. */
10931#ifdef RT_ARCH_AMD64
10932 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
10933 {
10934 /* test byte [rbp - xxx], 0ffh */
10935 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10936 pbCodeBuf[off++] = 0xf6;
10937 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
10938 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
10939 pbCodeBuf[off++] = 0xff;
10940 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10941 }
10942 else
10943#endif
10944 {
10945 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
10946 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
10947 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
10948 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
10949 }
10950 uint32_t const offJmpFixup = off;
10951 off = iemNativeEmitJzToFixed(pReNative, off, 0);
10952
10953 /*
10954 * Call the unmap helper function.
10955 */
10956#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
10957 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10958#else
10959 RT_NOREF(idxInstr);
10960#endif
10961
10962 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
10963 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo);
10964
10965 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10966 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10967
10968 /* Done setting up parameters, make the call. */
10969 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10970
10971 /* The bUnmapInfo variable is implictly free by these MCs. */
10972 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
10973
10974 /*
10975 * Done, just fixup the jump for the non-call case.
10976 */
10977 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
10978
10979 return off;
10980}
10981
10982
10983
10984/*********************************************************************************************************************************
10985* State and Exceptions *
10986*********************************************************************************************************************************/
10987
10988#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
10989#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
10990
10991#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
10992#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
10993#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
10994
10995#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
10996#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
10997#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
10998
10999
11000DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
11001{
11002 /** @todo this needs a lot more work later. */
11003 RT_NOREF(pReNative, fForChange);
11004 return off;
11005}
11006
11007
11008
11009/*********************************************************************************************************************************
11010* Builtin functions *
11011*********************************************************************************************************************************/
11012
11013/**
11014 * Built-in function that calls a C-implemention function taking zero arguments.
11015 */
11016static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
11017{
11018 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
11019 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
11020 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
11021 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
11022}
11023
11024
11025/**
11026 * Built-in function that checks for pending interrupts that can be delivered or
11027 * forced action flags.
11028 *
11029 * This triggers after the completion of an instruction, so EIP is already at
11030 * the next instruction. If an IRQ or important FF is pending, this will return
11031 * a non-zero status that stops TB execution.
11032 */
11033static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
11034{
11035 RT_NOREF(pCallEntry);
11036
11037 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
11038 and I'm too lazy to create a 'Fixed' version of that one. */
11039 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
11040 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
11041
11042 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
11043
11044 /* Again, we need to load the extended EFLAGS before we actually need them
11045 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
11046 loaded them inside the check, as the shadow state would not be correct
11047 when the code branches before the load. Ditto PC. */
11048 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
11049 kIemNativeGstRegUse_ReadOnly);
11050
11051 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
11052
11053 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11054
11055 /*
11056 * Start by checking the local forced actions of the EMT we're on for IRQs
11057 * and other FFs that needs servicing.
11058 */
11059 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
11060 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
11061 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
11062 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
11063 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
11064 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
11065 | VMCPU_FF_TLB_FLUSH
11066 | VMCPU_FF_UNHALT ),
11067 true /*fSetFlags*/);
11068 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
11069 uint32_t const offFixupJumpToVmCheck1 = off;
11070 off = iemNativeEmitJzToFixed(pReNative, off, 0);
11071
11072 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
11073 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
11074 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
11075 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
11076 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
11077 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
11078
11079 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
11080 suppressed by the CPU or not. */
11081 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
11082 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
11083 idxLabelReturnBreak);
11084
11085 /* We've got shadow flags set, so we must check that the PC they are valid
11086 for matches our current PC value. */
11087 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
11088 * a register. */
11089 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
11090 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
11091
11092 /*
11093 * Now check the force flags of the VM.
11094 */
11095 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
11096 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
11097 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
11098 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
11099 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
11100 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
11101
11102 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
11103
11104 /*
11105 * We're good, no IRQs or FFs pending.
11106 */
11107 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11108 iemNativeRegFreeTmp(pReNative, idxEflReg);
11109 iemNativeRegFreeTmp(pReNative, idxPcReg);
11110
11111 return off;
11112}
11113
11114
11115/**
11116 * Built-in function checks if IEMCPU::fExec has the expected value.
11117 */
11118static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
11119{
11120 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
11121 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11122
11123 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
11124 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
11125 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
11126 kIemNativeLabelType_ReturnBreak);
11127 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11128 return off;
11129}
11130
11131
11132/**
11133 * Sets idxTbCurInstr in preparation of raising an exception.
11134 */
11135/** @todo Optimize this, so we don't set the same value more than once. Just
11136 * needs some tracking. */
11137#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11138# define BODY_SET_CUR_INSTR() \
11139 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
11140#else
11141# define BODY_SET_CUR_INSTR() ((void)0)
11142#endif
11143
11144
11145/**
11146 * Macro that emits the 16/32-bit CS.LIM check.
11147 */
11148#define BODY_CHECK_CS_LIM(a_cbInstr) \
11149 off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
11150
11151DECL_FORCE_INLINE(uint32_t)
11152iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
11153{
11154 Assert(cbInstr > 0);
11155 Assert(cbInstr < 16);
11156
11157 /*
11158 * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
11159 * a temporary register for calculating the last address of the instruction.
11160 *
11161 * The calculation and comparisons are 32-bit. We ASSUME that the incoming
11162 * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
11163 * that last updated EIP here checked it already, and that we're therefore
11164 * safe in the 32-bit wrap-around scenario to only check that the last byte
11165 * is within CS.LIM. In the case of instruction-by-instruction advancing
11166 * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
11167 * must be using 4KB granularity and the previous instruction was fine.
11168 */
11169 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
11170 kIemNativeGstRegUse_ReadOnly);
11171 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
11172 kIemNativeGstRegUse_ReadOnly);
11173#ifdef RT_ARCH_AMD64
11174 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11175#elif defined(RT_ARCH_ARM64)
11176 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11177#else
11178# error "Port me"
11179#endif
11180
11181 if (cbInstr != 1)
11182 {
11183 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
11184
11185 /*
11186 * 1. idxRegTmp = idxRegPc + cbInstr;
11187 * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
11188 */
11189#ifdef RT_ARCH_AMD64
11190 /* 1. lea tmp32, [Pc + cbInstr - 1] */
11191 if (idxRegTmp >= 8 || idxRegPc >= 8)
11192 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
11193 pbCodeBuf[off++] = 0x8d;
11194 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
11195 if ((idxRegPc & 7) == X86_GREG_xSP)
11196 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
11197 pbCodeBuf[off++] = cbInstr - 1;
11198
11199 /* 2. cmp tmp32(r), CsLim(r/m). */
11200 if (idxRegTmp >= 8 || idxRegCsLim >= 8)
11201 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
11202 pbCodeBuf[off++] = 0x3b;
11203 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
11204
11205#elif defined(RT_ARCH_ARM64)
11206 /* 1. add tmp32, Pc, #cbInstr-1 */
11207 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
11208 /* 2. cmp tmp32, CsLim */
11209 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
11210 false /*f64Bit*/, true /*fSetFlags*/);
11211
11212#endif
11213 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11214 }
11215 else
11216 {
11217 /*
11218 * Here we can skip step 1 and compare PC and CS.LIM directly.
11219 */
11220#ifdef RT_ARCH_AMD64
11221 /* 2. cmp eip(r), CsLim(r/m). */
11222 if (idxRegPc >= 8 || idxRegCsLim >= 8)
11223 pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
11224 pbCodeBuf[off++] = 0x3b;
11225 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
11226
11227#elif defined(RT_ARCH_ARM64)
11228 /* 2. cmp Pc, CsLim */
11229 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
11230 false /*f64Bit*/, true /*fSetFlags*/);
11231
11232#endif
11233 }
11234
11235 /* 3. Jump if greater. */
11236 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
11237
11238 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
11239 iemNativeRegFreeTmp(pReNative, idxRegPc);
11240 return off;
11241}
11242
11243
11244/**
11245 * Macro that implements opcode (re-)checking.
11246 */
11247#define BODY_CHECK_OPCODES_DISABLED(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
11248 off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
11249
11250DECL_FORCE_INLINE(uint32_t)
11251iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
11252{
11253 Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
11254 Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
11255
11256 uint32_t const idxLabelObsoleteTb = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ObsoleteTb);
11257
11258 /*
11259 * Where to start and how much to compare.
11260 *
11261 * Looking at the ranges produced when r160746 was running a DOS VM with TB
11262 * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
11263 * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
11264 *
11265 * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
11266 * 12, 10, 11, 6, 13, 15 and 16. Max 0x359 bytes. Same revision as above.
11267 */
11268 uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange;
11269 uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange;
11270 uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes];
11271 uint32_t offConsolidatedJump = UINT32_MAX;
11272
11273#ifdef RT_ARCH_AMD64
11274 /* AMD64/x86 offers a bunch of options. Smaller stuff will can be
11275 completely inlined, for larger we use REPE CMPS. */
11276# define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */ do { \
11277 pbCodeBuf[off++] = a_bOpcode; \
11278 Assert(offPage < 127); \
11279 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
11280 pbCodeBuf[off++] = RT_BYTE1(offPage); \
11281 } while (0)
11282
11283# define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
11284 if (offConsolidatedJump != UINT32_MAX) \
11285 { \
11286 int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
11287 Assert(offDisp >= -128); \
11288 pbCodeBuf[off++] = 0x75; /* jnz near */ \
11289 pbCodeBuf[off++] = (uint8_t)offDisp; \
11290 } \
11291 else \
11292 { \
11293 pbCodeBuf[off++] = 0x74; /* jz near +5 */ \
11294 pbCodeBuf[off++] = 0x05; \
11295 offConsolidatedJump = off; \
11296 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */ \
11297 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_Rel32, -4); \
11298 pbCodeBuf[off++] = 0x00; \
11299 pbCodeBuf[off++] = 0x00; \
11300 pbCodeBuf[off++] = 0x00; \
11301 pbCodeBuf[off++] = 0x00; \
11302 } \
11303 } while (0)
11304
11305# define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
11306 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
11307 pbCodeBuf[off++] = *pbOpcodes++; \
11308 pbCodeBuf[off++] = *pbOpcodes++; \
11309 pbCodeBuf[off++] = *pbOpcodes++; \
11310 pbCodeBuf[off++] = *pbOpcodes++; \
11311 cbLeft -= 4; \
11312 offPage += 4; \
11313 CHECK_OPCODES_CMP_JMP(); \
11314 } while (0)
11315
11316# define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
11317 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
11318 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
11319 pbCodeBuf[off++] = *pbOpcodes++; \
11320 pbCodeBuf[off++] = *pbOpcodes++; \
11321 cbLeft -= 2; \
11322 offPage += 2; \
11323 CHECK_OPCODES_CMP_JMP(); \
11324 } while (0)
11325
11326# define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
11327 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
11328 pbCodeBuf[off++] = *pbOpcodes++; \
11329 cbLeft -= 1; \
11330 offPage += 1; \
11331 CHECK_OPCODES_CMP_JMP(); \
11332 } while (0)
11333
11334# define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
11335 if (a_bPrefix) \
11336 pbCodeBuf[off++] = (a_bPrefix); \
11337 pbCodeBuf[off++] = (a_bOpcode); \
11338 CHECK_OPCODES_CMP_JMP(); \
11339 cbLeft -= (a_cbToSubtract); \
11340 } while (0)
11341
11342# define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
11343 pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
11344 pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
11345 pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
11346 pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
11347 pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
11348 } while (0)
11349
11350 if (cbLeft <= 24)
11351 {
11352 uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
11353 ( RT_BIT_32(X86_GREG_xAX)
11354 | RT_BIT_32(X86_GREG_xCX)
11355 | RT_BIT_32(X86_GREG_xDX)
11356 | RT_BIT_32(X86_GREG_xBX)
11357 | RT_BIT_32(X86_GREG_xSI)
11358 | RT_BIT_32(X86_GREG_xDI))
11359 & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
11360 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
11361 if (offPage >= 128 - cbLeft)
11362 {
11363 off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
11364 offPage &= 3;
11365 }
11366
11367 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 14 + 54 + 8 + 6 /* = 87 */);
11368
11369 if (cbLeft > 8)
11370 switch (offPage & 3)
11371 {
11372 case 0:
11373 break;
11374 case 1: /* cost: 6 + 8 = 14 */
11375 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
11376 RT_FALL_THRU();
11377 case 2: /* cost: 8 */
11378 CHECK_OPCODES_CMP_IMM16(idxRegTmp);
11379 break;
11380 case 3: /* cost: 6 */
11381 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
11382 break;
11383 }
11384
11385 while (cbLeft >= 4)
11386 CHECK_OPCODES_CMP_IMM32(idxRegTmp); /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
11387
11388 if (cbLeft >= 2)
11389 CHECK_OPCODES_CMP_IMM16(idxRegTmp); /* cost: 8 */
11390 if (cbLeft)
11391 CHECK_OPCODES_CMP_IMM8(idxRegTmp); /* cost: 6 */
11392
11393 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11394 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11395 }
11396 else
11397 {
11398 /* RDI = &pbInstrBuf[offPage] */
11399 uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
11400 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
11401 if (offPage != 0)
11402 off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
11403
11404 /* RSI = pbOpcodes */
11405 uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
11406 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
11407
11408 /* RCX = counts. */
11409 uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
11410
11411 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 10 + 5 + 5 + 3 + 4 + 3 /*= 35*/);
11412
11413 /** @todo profile and optimize this further. Maybe an idea to align by
11414 * offPage if the two cannot be reconsidled. */
11415 /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
11416 switch (offPage & 7) /* max cost: 10 */
11417 {
11418 case 0:
11419 break;
11420 case 1: /* cost: 3+4+3 = 10 */
11421 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
11422 RT_FALL_THRU();
11423 case 2: /* cost: 4+3 = 7 */
11424 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
11425 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
11426 break;
11427 case 3: /* cost: 3+3 = 6 */
11428 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
11429 RT_FALL_THRU();
11430 case 4: /* cost: 3 */
11431 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
11432 break;
11433 case 5: /* cost: 3+4 = 7 */
11434 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
11435 RT_FALL_THRU();
11436 case 6: /* cost: 4 */
11437 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
11438 break;
11439 case 7: /* cost: 3 */
11440 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
11441 break;
11442 }
11443
11444 /* Compare qwords: */
11445 uint32_t const cQWords = cbLeft >> 3;
11446 CHECK_OPCODES_ECX_IMM(cQWords); /* cost: 5 */
11447
11448 pbCodeBuf[off++] = X86_OP_PRF_REPZ; /* cost: 5 */
11449 CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
11450 cbLeft &= 7;
11451
11452 if (cbLeft & 4)
11453 CHECK_OPCODES_CMPSX(0xa7, 0, 0); /* cost: 3 */
11454 if (cbLeft & 2)
11455 CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_PRF_SIZE_OP); /* cost: 4 */
11456 if (cbLeft & 2)
11457 CHECK_OPCODES_CMPSX(0xa6, 0, 0); /* cost: 3 */
11458
11459 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11460 iemNativeRegFreeTmp(pReNative, idxRegCx);
11461 iemNativeRegFreeTmp(pReNative, idxRegSi);
11462 iemNativeRegFreeTmp(pReNative, idxRegDi);
11463 }
11464
11465#elif defined(RT_ARCH_ARM64)
11466 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
11467 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
11468# if 0
11469
11470 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11471 /** @todo continue here */
11472# else
11473 AssertReleaseFailed();
11474 RT_NOREF(pReNative, off, pTb, idxRange, offRange);
11475# endif
11476 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11477#else
11478# error "Port me"
11479#endif
11480 return off;
11481}
11482
11483
11484#ifdef BODY_CHECK_CS_LIM
11485/**
11486 * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
11487 * raising a \#GP(0) if this isn't the case.
11488 */
11489static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
11490{
11491 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11492 BODY_SET_CUR_INSTR();
11493 BODY_CHECK_CS_LIM(cbInstr);
11494 return off;
11495}
11496#endif
11497
11498
11499#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
11500/**
11501 * Built-in function for re-checking opcodes and CS.LIM after an instruction
11502 * that may have modified them.
11503 */
11504static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
11505{
11506 PCIEMTB const pTb = pReNative->pTbOrg;
11507 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11508 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11509 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
11510 BODY_SET_CUR_INSTR();
11511 BODY_CHECK_CS_LIM(cbInstr);
11512 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
11513 return off;
11514}
11515#endif
11516
11517
11518#if defined(BODY_CHECK_OPCODES)
11519/**
11520 * Built-in function for re-checking opcodes after an instruction that may have
11521 * modified them.
11522 */
11523static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
11524{
11525 PCIEMTB const pTb = pReNative->pTbOrg;
11526 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11527 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11528 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
11529 BODY_SET_CUR_INSTR();
11530 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
11531 return off;
11532}
11533#endif
11534
11535
11536#if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
11537/**
11538 * Built-in function for re-checking opcodes and considering the need for CS.LIM
11539 * checking after an instruction that may have modified them.
11540 */
11541static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
11542{
11543 PCIEMTB const pTb = pReNative->pTbOrg;
11544 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11545 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11546 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
11547 BODY_SET_CUR_INSTR();
11548 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
11549 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
11550 return off;
11551}
11552#endif
11553
11554
11555/*
11556 * Post-branching checkers.
11557 */
11558
11559#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
11560/**
11561 * Built-in function for checking CS.LIM, checking the PC and checking opcodes
11562 * after conditional branching within the same page.
11563 *
11564 * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
11565 */
11566static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
11567{
11568 PCIEMTB const pTb = pReNative->pTbOrg;
11569 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11570 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11571 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
11572 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
11573 BODY_SET_CUR_INSTR();
11574 BODY_CHECK_CS_LIM(cbInstr);
11575 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, cbInstr);
11576 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
11577 //LogFunc(("okay\n"));
11578 return off;
11579}
11580#endif
11581
11582
11583#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
11584/**
11585 * Built-in function for checking the PC and checking opcodes after conditional
11586 * branching within the same page.
11587 *
11588 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
11589 */
11590static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
11591{
11592 PCIEMTB const pTb = pReNative->pTbOrg;
11593 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11594 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11595 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
11596 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
11597 BODY_SET_CUR_INSTR();
11598 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, cbInstr);
11599 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
11600 //LogFunc(("okay\n"));
11601 return off;
11602}
11603#endif
11604
11605
11606#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
11607/**
11608 * Built-in function for checking the PC and checking opcodes and considering
11609 * the need for CS.LIM checking after conditional branching within the same
11610 * page.
11611 *
11612 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
11613 */
11614static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
11615{
11616 PCIEMTB const pTb = pReNative->pTbOrg;
11617 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11618 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11619 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
11620 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
11621 BODY_SET_CUR_INSTR();
11622 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
11623 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, cbInstr);
11624 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
11625 //LogFunc(("okay\n"));
11626 return off;
11627}
11628#endif
11629
11630
11631#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
11632/**
11633 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
11634 * transitioning to a different code page.
11635 *
11636 * The code page transition can either be natural over onto the next page (with
11637 * the instruction starting at page offset zero) or by means of branching.
11638 *
11639 * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
11640 */
11641static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
11642{
11643 PCIEMTB const pTb = pReNative->pTbOrg;
11644 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11645 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11646 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
11647 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
11648 BODY_SET_CUR_INSTR();
11649 BODY_CHECK_CS_LIM(cbInstr);
11650 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
11651 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
11652 //LogFunc(("okay\n"));
11653 return off;
11654}
11655#endif
11656
11657
11658#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
11659/**
11660 * Built-in function for loading TLB and checking opcodes when transitioning to
11661 * a different code page.
11662 *
11663 * The code page transition can either be natural over onto the next page (with
11664 * the instruction starting at page offset zero) or by means of branching.
11665 *
11666 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
11667 */
11668static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
11669{
11670 PCIEMTB const pTb = pReNative->pTbOrg;
11671 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11672 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11673 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
11674 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
11675 BODY_SET_CUR_INSTR();
11676 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
11677 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
11678 //LogFunc(("okay\n"));
11679 return off;
11680}
11681#endif
11682
11683
11684#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
11685/**
11686 * Built-in function for loading TLB and checking opcodes and considering the
11687 * need for CS.LIM checking when transitioning to a different code page.
11688 *
11689 * The code page transition can either be natural over onto the next page (with
11690 * the instruction starting at page offset zero) or by means of branching.
11691 *
11692 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
11693 */
11694static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
11695{
11696 PCIEMTB const pTb = pReNative->pTbOrg;
11697 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11698 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11699 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
11700 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
11701 BODY_SET_CUR_INSTR();
11702 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
11703 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
11704 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
11705 //LogFunc(("okay\n"));
11706 return off;
11707}
11708#endif
11709
11710
11711
11712/*
11713 * Natural page crossing checkers.
11714 */
11715
11716#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
11717/**
11718 * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
11719 * both pages when transitioning to a different code page.
11720 *
11721 * This is used when the previous instruction requires revalidation of opcodes
11722 * bytes and the current instruction stries a page boundrary with opcode bytes
11723 * in both the old and new page.
11724 *
11725 * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
11726 */
11727static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
11728{
11729 PCIEMTB const pTb = pReNative->pTbOrg;
11730 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11731 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
11732 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
11733 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
11734 uint32_t const idxRange2 = idxRange1 + 1;
11735 BODY_SET_CUR_INSTR();
11736 BODY_CHECK_CS_LIM(cbInstr);
11737 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
11738 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
11739 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
11740 return off;
11741}
11742#endif
11743
11744
11745#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
11746/**
11747 * Built-in function for loading TLB and checking opcodes on both pages when
11748 * transitioning to a different code page.
11749 *
11750 * This is used when the previous instruction requires revalidation of opcodes
11751 * bytes and the current instruction stries a page boundrary with opcode bytes
11752 * in both the old and new page.
11753 *
11754 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
11755 */
11756static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
11757{
11758 PCIEMTB const pTb = pReNative->pTbOrg;
11759 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11760 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
11761 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
11762 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
11763 uint32_t const idxRange2 = idxRange1 + 1;
11764 BODY_SET_CUR_INSTR();
11765 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
11766 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
11767 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
11768 return off;
11769}
11770#endif
11771
11772
11773#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
11774/**
11775 * Built-in function for loading TLB and checking opcodes on both pages and
11776 * considering the need for CS.LIM checking when transitioning to a different
11777 * code page.
11778 *
11779 * This is used when the previous instruction requires revalidation of opcodes
11780 * bytes and the current instruction stries a page boundrary with opcode bytes
11781 * in both the old and new page.
11782 *
11783 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
11784 */
11785static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
11786{
11787 PCIEMTB const pTb = pReNative->pTbOrg;
11788 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11789 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
11790 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
11791 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
11792 uint32_t const idxRange2 = idxRange1 + 1;
11793 BODY_SET_CUR_INSTR();
11794 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
11795 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
11796 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
11797 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
11798 return off;
11799}
11800#endif
11801
11802
11803#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
11804/**
11805 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
11806 * advancing naturally to a different code page.
11807 *
11808 * Only opcodes on the new page is checked.
11809 *
11810 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
11811 */
11812static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
11813{
11814 PCIEMTB const pTb = pReNative->pTbOrg;
11815 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11816 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
11817 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
11818 //uint32_t const offRange1 = (uint32_t)uParam2;
11819 uint32_t const idxRange2 = idxRange1 + 1;
11820 BODY_SET_CUR_INSTR();
11821 BODY_CHECK_CS_LIM(cbInstr);
11822 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
11823 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
11824 return off;
11825}
11826#endif
11827
11828
11829#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
11830/**
11831 * Built-in function for loading TLB and checking opcodes when advancing
11832 * naturally to a different code page.
11833 *
11834 * Only opcodes on the new page is checked.
11835 *
11836 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
11837 */
11838static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
11839{
11840 PCIEMTB const pTb = pReNative->pTbOrg;
11841 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11842 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
11843 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
11844 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
11845 uint32_t const idxRange2 = idxRange1 + 1;
11846 BODY_SET_CUR_INSTR();
11847 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
11848 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
11849 return off;
11850}
11851#endif
11852
11853
11854#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
11855/**
11856 * Built-in function for loading TLB and checking opcodes and considering the
11857 * need for CS.LIM checking when advancing naturally to a different code page.
11858 *
11859 * Only opcodes on the new page is checked.
11860 *
11861 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
11862 */
11863static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
11864{
11865 PCIEMTB const pTb = pReNative->pTbOrg;
11866 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11867 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
11868 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
11869 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
11870 uint32_t const idxRange2 = idxRange1 + 1;
11871 BODY_SET_CUR_INSTR();
11872 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
11873 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
11874 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
11875 return off;
11876}
11877#endif
11878
11879
11880#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
11881/**
11882 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
11883 * advancing naturally to a different code page with first instr at byte 0.
11884 *
11885 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
11886 */
11887static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
11888{
11889 PCIEMTB const pTb = pReNative->pTbOrg;
11890 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11891 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11892 BODY_SET_CUR_INSTR();
11893 BODY_CHECK_CS_LIM(cbInstr);
11894 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
11895 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
11896 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
11897 return off;
11898}
11899#endif
11900
11901
11902#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
11903/**
11904 * Built-in function for loading TLB and checking opcodes when advancing
11905 * naturally to a different code page with first instr at byte 0.
11906 *
11907 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
11908 */
11909static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
11910{
11911 PCIEMTB const pTb = pReNative->pTbOrg;
11912 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11913 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11914 BODY_SET_CUR_INSTR();
11915 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
11916 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
11917 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
11918 return off;
11919}
11920#endif
11921
11922
11923#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
11924/**
11925 * Built-in function for loading TLB and checking opcodes and considering the
11926 * need for CS.LIM checking when advancing naturally to a different code page
11927 * with first instr at byte 0.
11928 *
11929 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
11930 */
11931static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
11932{
11933 PCIEMTB const pTb = pReNative->pTbOrg;
11934 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
11935 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
11936 BODY_SET_CUR_INSTR();
11937 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
11938 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
11939 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
11940 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
11941 return off;
11942}
11943#endif
11944
11945
11946/*********************************************************************************************************************************
11947* The native code generator functions for each MC block. *
11948*********************************************************************************************************************************/
11949
11950
11951/*
11952 * Include g_apfnIemNativeRecompileFunctions and associated functions.
11953 *
11954 * This should probably live in it's own file later, but lets see what the
11955 * compile times turn out to be first.
11956 */
11957#include "IEMNativeFunctions.cpp.h"
11958
11959
11960
11961/*********************************************************************************************************************************
11962* Recompiler Core. *
11963*********************************************************************************************************************************/
11964
11965
11966/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
11967static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
11968{
11969 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
11970 pDis->cbCachedInstr += cbMaxRead;
11971 RT_NOREF(cbMinRead);
11972 return VERR_NO_DATA;
11973}
11974
11975
11976/**
11977 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
11978 * @returns pszBuf.
11979 * @param fFlags The flags.
11980 * @param pszBuf The output buffer.
11981 * @param cbBuf The output buffer size. At least 32 bytes.
11982 */
11983DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
11984{
11985 Assert(cbBuf >= 32);
11986 static RTSTRTUPLE const s_aModes[] =
11987 {
11988 /* [00] = */ { RT_STR_TUPLE("16BIT") },
11989 /* [01] = */ { RT_STR_TUPLE("32BIT") },
11990 /* [02] = */ { RT_STR_TUPLE("!2!") },
11991 /* [03] = */ { RT_STR_TUPLE("!3!") },
11992 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
11993 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
11994 /* [06] = */ { RT_STR_TUPLE("!6!") },
11995 /* [07] = */ { RT_STR_TUPLE("!7!") },
11996 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
11997 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
11998 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
11999 /* [0b] = */ { RT_STR_TUPLE("!b!") },
12000 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
12001 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
12002 /* [0e] = */ { RT_STR_TUPLE("!e!") },
12003 /* [0f] = */ { RT_STR_TUPLE("!f!") },
12004 /* [10] = */ { RT_STR_TUPLE("!10!") },
12005 /* [11] = */ { RT_STR_TUPLE("!11!") },
12006 /* [12] = */ { RT_STR_TUPLE("!12!") },
12007 /* [13] = */ { RT_STR_TUPLE("!13!") },
12008 /* [14] = */ { RT_STR_TUPLE("!14!") },
12009 /* [15] = */ { RT_STR_TUPLE("!15!") },
12010 /* [16] = */ { RT_STR_TUPLE("!16!") },
12011 /* [17] = */ { RT_STR_TUPLE("!17!") },
12012 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
12013 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
12014 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
12015 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
12016 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
12017 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
12018 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
12019 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
12020 };
12021 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
12022 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
12023 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
12024
12025 pszBuf[off++] = ' ';
12026 pszBuf[off++] = 'C';
12027 pszBuf[off++] = 'P';
12028 pszBuf[off++] = 'L';
12029 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
12030 Assert(off < 32);
12031
12032 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
12033
12034 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
12035 {
12036 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
12037 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
12038 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
12039 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
12040 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
12041 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
12042 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
12043 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
12044 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
12045 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
12046 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
12047 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
12048 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
12049 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
12050 };
12051 if (fFlags)
12052 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
12053 if (s_aFlags[i].fFlag & fFlags)
12054 {
12055 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
12056 pszBuf[off++] = ' ';
12057 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
12058 off += s_aFlags[i].cchName;
12059 fFlags &= ~s_aFlags[i].fFlag;
12060 if (!fFlags)
12061 break;
12062 }
12063 pszBuf[off] = '\0';
12064
12065 return pszBuf;
12066}
12067
12068
12069DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
12070{
12071 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
12072
12073 char szDisBuf[512];
12074 DISSTATE Dis;
12075 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
12076 uint32_t const cNative = pTb->Native.cInstructions;
12077 uint32_t offNative = 0;
12078#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12079 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
12080#endif
12081 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
12082 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
12083 : DISCPUMODE_64BIT;
12084#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12085 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
12086#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12087 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
12088#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12089# error "Port me"
12090#else
12091 csh hDisasm = ~(size_t)0;
12092# if defined(RT_ARCH_AMD64)
12093 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
12094# elif defined(RT_ARCH_ARM64)
12095 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
12096# else
12097# error "Port me"
12098# endif
12099 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
12100#endif
12101
12102 /*
12103 * Print TB info.
12104 */
12105 pHlp->pfnPrintf(pHlp,
12106 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
12107 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
12108 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
12109 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
12110#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12111 if (pDbgInfo && pDbgInfo->cEntries > 1)
12112 {
12113 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
12114
12115 /*
12116 * This disassembly is driven by the debug info which follows the native
12117 * code and indicates when it starts with the next guest instructions,
12118 * where labels are and such things.
12119 */
12120 uint32_t idxThreadedCall = 0;
12121 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
12122 uint8_t idxRange = UINT8_MAX;
12123 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
12124 uint32_t offRange = 0;
12125 uint32_t offOpcodes = 0;
12126 uint32_t const cbOpcodes = pTb->cbOpcodes;
12127 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
12128 uint32_t const cDbgEntries = pDbgInfo->cEntries;
12129 uint32_t iDbgEntry = 1;
12130 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
12131
12132 while (offNative < cNative)
12133 {
12134 /* If we're at or have passed the point where the next chunk of debug
12135 info starts, process it. */
12136 if (offDbgNativeNext <= offNative)
12137 {
12138 offDbgNativeNext = UINT32_MAX;
12139 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
12140 {
12141 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
12142 {
12143 case kIemTbDbgEntryType_GuestInstruction:
12144 {
12145 /* Did the exec flag change? */
12146 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
12147 {
12148 pHlp->pfnPrintf(pHlp,
12149 " fExec change %#08x -> %#08x %s\n",
12150 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
12151 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
12152 szDisBuf, sizeof(szDisBuf)));
12153 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
12154 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
12155 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
12156 : DISCPUMODE_64BIT;
12157 }
12158
12159 /* New opcode range? We need to fend up a spurious debug info entry here for cases
12160 where the compilation was aborted before the opcode was recorded and the actual
12161 instruction was translated to a threaded call. This may happen when we run out
12162 of ranges, or when some complicated interrupts/FFs are found to be pending or
12163 similar. So, we just deal with it here rather than in the compiler code as it
12164 is a lot simpler to do here. */
12165 if ( idxRange == UINT8_MAX
12166 || idxRange >= cRanges
12167 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
12168 {
12169 idxRange += 1;
12170 if (idxRange < cRanges)
12171 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
12172 else
12173 continue;
12174 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
12175 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
12176 + (pTb->aRanges[idxRange].idxPhysPage == 0
12177 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
12178 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
12179 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
12180 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
12181 pTb->aRanges[idxRange].idxPhysPage);
12182 GCPhysPc += offRange;
12183 }
12184
12185 /* Disassemble the instruction. */
12186 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
12187 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offRange, 15);
12188 uint32_t cbInstr = 1;
12189 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
12190 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
12191 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
12192 if (RT_SUCCESS(rc))
12193 {
12194 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12195 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12196 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12197 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12198
12199 static unsigned const s_offMarker = 55;
12200 static char const s_szMarker[] = " ; <--- guest";
12201 if (cch < s_offMarker)
12202 {
12203 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
12204 cch = s_offMarker;
12205 }
12206 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
12207 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
12208
12209 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
12210 }
12211 else
12212 {
12213 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
12214 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
12215 cbInstr = 1;
12216 }
12217 GCPhysPc += cbInstr;
12218 offOpcodes += cbInstr;
12219 offRange += cbInstr;
12220 continue;
12221 }
12222
12223 case kIemTbDbgEntryType_ThreadedCall:
12224 pHlp->pfnPrintf(pHlp,
12225 " Call #%u to %s (%u args) - %s\n",
12226 idxThreadedCall,
12227 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
12228 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
12229 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
12230 idxThreadedCall++;
12231 continue;
12232
12233 case kIemTbDbgEntryType_GuestRegShadowing:
12234 {
12235 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
12236 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
12237 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
12238 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
12239 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
12240 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
12241 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
12242 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
12243 else
12244 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
12245 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
12246 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
12247 continue;
12248 }
12249
12250 case kIemTbDbgEntryType_Label:
12251 {
12252 const char *pszName = "what_the_fudge";
12253 const char *pszComment = "";
12254 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
12255 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
12256 {
12257 case kIemNativeLabelType_Return:
12258 pszName = "Return";
12259 break;
12260 case kIemNativeLabelType_ReturnBreak:
12261 pszName = "ReturnBreak";
12262 break;
12263 case kIemNativeLabelType_ReturnWithFlags:
12264 pszName = "ReturnWithFlags";
12265 break;
12266 case kIemNativeLabelType_NonZeroRetOrPassUp:
12267 pszName = "NonZeroRetOrPassUp";
12268 break;
12269 case kIemNativeLabelType_RaiseGp0:
12270 pszName = "RaiseGp0";
12271 break;
12272 case kIemNativeLabelType_ObsoleteTb:
12273 pszName = "ObsoleteTb";
12274 break;
12275 case kIemNativeLabelType_If:
12276 pszName = "If";
12277 fNumbered = true;
12278 break;
12279 case kIemNativeLabelType_Else:
12280 pszName = "Else";
12281 fNumbered = true;
12282 pszComment = " ; regs state restored pre-if-block";
12283 break;
12284 case kIemNativeLabelType_Endif:
12285 pszName = "Endif";
12286 fNumbered = true;
12287 break;
12288 case kIemNativeLabelType_CheckIrq:
12289 pszName = "CheckIrq_CheckVM";
12290 fNumbered = true;
12291 break;
12292 case kIemNativeLabelType_TlbMiss:
12293 pszName = "TlbMiss";
12294 fNumbered = true;
12295 break;
12296 case kIemNativeLabelType_TlbDone:
12297 pszName = "TlbDone";
12298 fNumbered = true;
12299 break;
12300 case kIemNativeLabelType_Invalid:
12301 case kIemNativeLabelType_End:
12302 break;
12303 }
12304 if (fNumbered)
12305 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
12306 else
12307 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
12308 continue;
12309 }
12310
12311 case kIemTbDbgEntryType_NativeOffset:
12312 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
12313 Assert(offDbgNativeNext > offNative);
12314 break;
12315
12316 default:
12317 AssertFailed();
12318 }
12319 iDbgEntry++;
12320 break;
12321 }
12322 }
12323
12324 /*
12325 * Disassemble the next native instruction.
12326 */
12327 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
12328# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12329 uint32_t cbInstr = sizeof(paNative[0]);
12330 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
12331 if (RT_SUCCESS(rc))
12332 {
12333# if defined(RT_ARCH_AMD64)
12334 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
12335 {
12336 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
12337 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
12338 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
12339 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
12340 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
12341 uInfo & 0x8000 ? "recompiled" : "todo");
12342 else
12343 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
12344 }
12345 else
12346# endif
12347 {
12348# ifdef RT_ARCH_AMD64
12349 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12350 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12351 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12352 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12353# elif defined(RT_ARCH_ARM64)
12354 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
12355 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12356 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12357# else
12358# error "Port me"
12359# endif
12360 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
12361 }
12362 }
12363 else
12364 {
12365# if defined(RT_ARCH_AMD64)
12366 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
12367 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
12368# elif defined(RT_ARCH_ARM64)
12369 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
12370# else
12371# error "Port me"
12372# endif
12373 cbInstr = sizeof(paNative[0]);
12374 }
12375 offNative += cbInstr / sizeof(paNative[0]);
12376
12377# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12378 cs_insn *pInstr;
12379 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
12380 (uintptr_t)pNativeCur, 1, &pInstr);
12381 if (cInstrs > 0)
12382 {
12383 Assert(cInstrs == 1);
12384# if defined(RT_ARCH_AMD64)
12385 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
12386 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
12387# else
12388 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
12389 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
12390# endif
12391 offNative += pInstr->size / sizeof(*pNativeCur);
12392 cs_free(pInstr, cInstrs);
12393 }
12394 else
12395 {
12396# if defined(RT_ARCH_AMD64)
12397 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
12398 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
12399# else
12400 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
12401# endif
12402 offNative++;
12403 }
12404# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12405 }
12406 }
12407 else
12408#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
12409 {
12410 /*
12411 * No debug info, just disassemble the x86 code and then the native code.
12412 *
12413 * First the guest code:
12414 */
12415 for (unsigned i = 0; i < pTb->cRanges; i++)
12416 {
12417 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
12418 + (pTb->aRanges[i].idxPhysPage == 0
12419 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
12420 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
12421 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
12422 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
12423 unsigned off = pTb->aRanges[i].offOpcodes;
12424 /** @todo this ain't working when crossing pages! */
12425 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
12426 while (off < cbOpcodes)
12427 {
12428 uint32_t cbInstr = 1;
12429 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
12430 &pTb->pabOpcodes[off], cbOpcodes - off,
12431 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
12432 if (RT_SUCCESS(rc))
12433 {
12434 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12435 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12436 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12437 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12438 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
12439 GCPhysPc += cbInstr;
12440 off += cbInstr;
12441 }
12442 else
12443 {
12444 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
12445 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
12446 break;
12447 }
12448 }
12449 }
12450
12451 /*
12452 * Then the native code:
12453 */
12454 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
12455 while (offNative < cNative)
12456 {
12457 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
12458# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12459 uint32_t cbInstr = sizeof(paNative[0]);
12460 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
12461 if (RT_SUCCESS(rc))
12462 {
12463# if defined(RT_ARCH_AMD64)
12464 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
12465 {
12466 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
12467 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
12468 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
12469 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
12470 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
12471 uInfo & 0x8000 ? "recompiled" : "todo");
12472 else
12473 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
12474 }
12475 else
12476# endif
12477 {
12478# ifdef RT_ARCH_AMD64
12479 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12480 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12481 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12482 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12483# elif defined(RT_ARCH_ARM64)
12484 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
12485 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12486 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12487# else
12488# error "Port me"
12489# endif
12490 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
12491 }
12492 }
12493 else
12494 {
12495# if defined(RT_ARCH_AMD64)
12496 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
12497 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
12498# else
12499 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
12500# endif
12501 cbInstr = sizeof(paNative[0]);
12502 }
12503 offNative += cbInstr / sizeof(paNative[0]);
12504
12505# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12506 cs_insn *pInstr;
12507 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
12508 (uintptr_t)pNativeCur, 1, &pInstr);
12509 if (cInstrs > 0)
12510 {
12511 Assert(cInstrs == 1);
12512# if defined(RT_ARCH_AMD64)
12513 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
12514 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
12515# else
12516 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
12517 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
12518# endif
12519 offNative += pInstr->size / sizeof(*pNativeCur);
12520 cs_free(pInstr, cInstrs);
12521 }
12522 else
12523 {
12524# if defined(RT_ARCH_AMD64)
12525 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
12526 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
12527# else
12528 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
12529# endif
12530 offNative++;
12531 }
12532# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12533 }
12534 }
12535
12536#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12537 /* Cleanup. */
12538 cs_close(&hDisasm);
12539#endif
12540}
12541
12542
12543/**
12544 * Recompiles the given threaded TB into a native one.
12545 *
12546 * In case of failure the translation block will be returned as-is.
12547 *
12548 * @returns pTb.
12549 * @param pVCpu The cross context virtual CPU structure of the calling
12550 * thread.
12551 * @param pTb The threaded translation to recompile to native.
12552 */
12553DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
12554{
12555 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
12556
12557 /*
12558 * The first time thru, we allocate the recompiler state, the other times
12559 * we just need to reset it before using it again.
12560 */
12561 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
12562 if (RT_LIKELY(pReNative))
12563 iemNativeReInit(pReNative, pTb);
12564 else
12565 {
12566 pReNative = iemNativeInit(pVCpu, pTb);
12567 AssertReturn(pReNative, pTb);
12568 }
12569
12570 /*
12571 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
12572 * for aborting if an error happens.
12573 */
12574 uint32_t cCallsLeft = pTb->Thrd.cCalls;
12575#ifdef LOG_ENABLED
12576 uint32_t const cCallsOrg = cCallsLeft;
12577#endif
12578 uint32_t off = 0;
12579 int rc = VINF_SUCCESS;
12580 IEMNATIVE_TRY_SETJMP(pReNative, rc)
12581 {
12582 /*
12583 * Emit prolog code (fixed).
12584 */
12585 off = iemNativeEmitProlog(pReNative, off);
12586
12587 /*
12588 * Convert the calls to native code.
12589 */
12590#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12591 int32_t iGstInstr = -1;
12592#endif
12593#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
12594 uint32_t cThreadedCalls = 0;
12595 uint32_t cRecompiledCalls = 0;
12596#endif
12597 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
12598 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
12599 while (cCallsLeft-- > 0)
12600 {
12601 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
12602
12603 /*
12604 * Debug info and assembly markup.
12605 */
12606 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
12607 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
12608#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12609 iemNativeDbgInfoAddNativeOffset(pReNative, off);
12610 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
12611 {
12612 if (iGstInstr < (int32_t)pTb->cInstructions)
12613 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
12614 else
12615 Assert(iGstInstr == pTb->cInstructions);
12616 iGstInstr = pCallEntry->idxInstr;
12617 }
12618 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
12619#endif
12620#if defined(VBOX_STRICT)
12621 off = iemNativeEmitMarker(pReNative, off,
12622 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
12623 pCallEntry->enmFunction));
12624#endif
12625#if defined(VBOX_STRICT)
12626 iemNativeRegAssertSanity(pReNative);
12627#endif
12628
12629 /*
12630 * Actual work.
12631 */
12632 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
12633 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)"));
12634 if (pfnRecom) /** @todo stats on this. */
12635 {
12636 off = pfnRecom(pReNative, off, pCallEntry);
12637 STAM_REL_STATS({cRecompiledCalls++;});
12638 }
12639 else
12640 {
12641 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
12642 STAM_REL_STATS({cThreadedCalls++;});
12643 }
12644 Assert(off <= pReNative->cInstrBufAlloc);
12645 Assert(pReNative->cCondDepth == 0);
12646
12647 /*
12648 * Advance.
12649 */
12650 pCallEntry++;
12651 }
12652
12653 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
12654 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
12655 if (!cThreadedCalls)
12656 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
12657
12658 /*
12659 * Emit the epilog code.
12660 */
12661 uint32_t idxReturnLabel;
12662 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
12663
12664 /*
12665 * Generate special jump labels.
12666 */
12667 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
12668 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
12669 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
12670 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
12671 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
12672 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
12673 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
12674 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
12675 }
12676 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
12677 {
12678 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
12679 return pTb;
12680 }
12681 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
12682 Assert(off <= pReNative->cInstrBufAlloc);
12683
12684 /*
12685 * Make sure all labels has been defined.
12686 */
12687 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
12688#ifdef VBOX_STRICT
12689 uint32_t const cLabels = pReNative->cLabels;
12690 for (uint32_t i = 0; i < cLabels; i++)
12691 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
12692#endif
12693
12694 /*
12695 * Allocate executable memory, copy over the code we've generated.
12696 */
12697 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
12698 if (pTbAllocator->pDelayedFreeHead)
12699 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
12700
12701 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
12702 AssertReturn(paFinalInstrBuf, pTb);
12703 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
12704
12705 /*
12706 * Apply fixups.
12707 */
12708 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
12709 uint32_t const cFixups = pReNative->cFixups;
12710 for (uint32_t i = 0; i < cFixups; i++)
12711 {
12712 Assert(paFixups[i].off < off);
12713 Assert(paFixups[i].idxLabel < cLabels);
12714 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
12715 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
12716 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
12717 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
12718 switch (paFixups[i].enmType)
12719 {
12720#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
12721 case kIemNativeFixupType_Rel32:
12722 Assert(paFixups[i].off + 4 <= off);
12723 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12724 continue;
12725
12726#elif defined(RT_ARCH_ARM64)
12727 case kIemNativeFixupType_RelImm26At0:
12728 {
12729 Assert(paFixups[i].off < off);
12730 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12731 Assert(offDisp >= -262144 && offDisp < 262144);
12732 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
12733 continue;
12734 }
12735
12736 case kIemNativeFixupType_RelImm19At5:
12737 {
12738 Assert(paFixups[i].off < off);
12739 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12740 Assert(offDisp >= -262144 && offDisp < 262144);
12741 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
12742 continue;
12743 }
12744
12745 case kIemNativeFixupType_RelImm14At5:
12746 {
12747 Assert(paFixups[i].off < off);
12748 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12749 Assert(offDisp >= -8192 && offDisp < 8192);
12750 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
12751 continue;
12752 }
12753
12754#endif
12755 case kIemNativeFixupType_Invalid:
12756 case kIemNativeFixupType_End:
12757 break;
12758 }
12759 AssertFailed();
12760 }
12761
12762 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
12763 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
12764
12765 /*
12766 * Convert the translation block.
12767 */
12768 RTMemFree(pTb->Thrd.paCalls);
12769 pTb->Native.paInstructions = paFinalInstrBuf;
12770 pTb->Native.cInstructions = off;
12771 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
12772#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12773 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
12774 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
12775#endif
12776
12777 Assert(pTbAllocator->cThreadedTbs > 0);
12778 pTbAllocator->cThreadedTbs -= 1;
12779 pTbAllocator->cNativeTbs += 1;
12780 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
12781
12782#ifdef LOG_ENABLED
12783 /*
12784 * Disassemble to the log if enabled.
12785 */
12786 if (LogIs3Enabled())
12787 {
12788 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
12789 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
12790# ifdef DEBUG_bird
12791 RTLogFlush(NULL);
12792# endif
12793 }
12794#endif
12795 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
12796
12797 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
12798 return pTb;
12799}
12800
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette