VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102736

Last change on this file since 102736 was 102736, checked in by vboxsync, 16 months ago

VMM/IEM: Debugged TLB lookup code on arm and enabled it there. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 572.7 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102736 2023-12-31 01:06:09Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128/*********************************************************************************************************************************
129* Internal Functions *
130*********************************************************************************************************************************/
131#ifdef VBOX_STRICT
132static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
133 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
134static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
135#endif
136#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
137static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
138static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
139#endif
140DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
142 IEMNATIVEGSTREG enmGstReg, uint32_t off);
143
144
145/*********************************************************************************************************************************
146* Executable Memory Allocator *
147*********************************************************************************************************************************/
148/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
149 * Use an alternative chunk sub-allocator that does store internal data
150 * in the chunk.
151 *
152 * Using the RTHeapSimple is not practial on newer darwin systems where
153 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
154 * memory. We would have to change the protection of the whole chunk for
155 * every call to RTHeapSimple, which would be rather expensive.
156 *
157 * This alternative implemenation let restrict page protection modifications
158 * to the pages backing the executable memory we just allocated.
159 */
160#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
161/** The chunk sub-allocation unit size in bytes. */
162#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
163/** The chunk sub-allocation unit size as a shift factor. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
165
166#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
167# ifdef IEMNATIVE_USE_GDB_JIT
168# define IEMNATIVE_USE_GDB_JIT_ET_DYN
169
170/** GDB JIT: Code entry. */
171typedef struct GDBJITCODEENTRY
172{
173 struct GDBJITCODEENTRY *pNext;
174 struct GDBJITCODEENTRY *pPrev;
175 uint8_t *pbSymFile;
176 uint64_t cbSymFile;
177} GDBJITCODEENTRY;
178
179/** GDB JIT: Actions. */
180typedef enum GDBJITACTIONS : uint32_t
181{
182 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
183} GDBJITACTIONS;
184
185/** GDB JIT: Descriptor. */
186typedef struct GDBJITDESCRIPTOR
187{
188 uint32_t uVersion;
189 GDBJITACTIONS enmAction;
190 GDBJITCODEENTRY *pRelevant;
191 GDBJITCODEENTRY *pHead;
192 /** Our addition: */
193 GDBJITCODEENTRY *pTail;
194} GDBJITDESCRIPTOR;
195
196/** GDB JIT: Our simple symbol file data. */
197typedef struct GDBJITSYMFILE
198{
199 Elf64_Ehdr EHdr;
200# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
201 Elf64_Shdr aShdrs[5];
202# else
203 Elf64_Shdr aShdrs[7];
204 Elf64_Phdr aPhdrs[2];
205# endif
206 /** The dwarf ehframe data for the chunk. */
207 uint8_t abEhFrame[512];
208 char szzStrTab[128];
209 Elf64_Sym aSymbols[3];
210# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
211 Elf64_Sym aDynSyms[2];
212 Elf64_Dyn aDyn[6];
213# endif
214} GDBJITSYMFILE;
215
216extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
217extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
218
219/** Init once for g_IemNativeGdbJitLock. */
220static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
221/** Init once for the critical section. */
222static RTCRITSECT g_IemNativeGdbJitLock;
223
224/** GDB reads the info here. */
225GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
226
227/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
228DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
229{
230 ASMNopPause();
231}
232
233/** @callback_method_impl{FNRTONCE} */
234static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
235{
236 RT_NOREF(pvUser);
237 return RTCritSectInit(&g_IemNativeGdbJitLock);
238}
239
240
241# endif /* IEMNATIVE_USE_GDB_JIT */
242
243/**
244 * Per-chunk unwind info for non-windows hosts.
245 */
246typedef struct IEMEXECMEMCHUNKEHFRAME
247{
248# ifdef IEMNATIVE_USE_LIBUNWIND
249 /** The offset of the FDA into abEhFrame. */
250 uintptr_t offFda;
251# else
252 /** 'struct object' storage area. */
253 uint8_t abObject[1024];
254# endif
255# ifdef IEMNATIVE_USE_GDB_JIT
256# if 0
257 /** The GDB JIT 'symbol file' data. */
258 GDBJITSYMFILE GdbJitSymFile;
259# endif
260 /** The GDB JIT list entry. */
261 GDBJITCODEENTRY GdbJitEntry;
262# endif
263 /** The dwarf ehframe data for the chunk. */
264 uint8_t abEhFrame[512];
265} IEMEXECMEMCHUNKEHFRAME;
266/** Pointer to per-chunk info info for non-windows hosts. */
267typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
268#endif
269
270
271/**
272 * An chunk of executable memory.
273 */
274typedef struct IEMEXECMEMCHUNK
275{
276#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
277 /** Number of free items in this chunk. */
278 uint32_t cFreeUnits;
279 /** Hint were to start searching for free space in the allocation bitmap. */
280 uint32_t idxFreeHint;
281#else
282 /** The heap handle. */
283 RTHEAPSIMPLE hHeap;
284#endif
285 /** Pointer to the chunk. */
286 void *pvChunk;
287#ifdef IN_RING3
288 /**
289 * Pointer to the unwind information.
290 *
291 * This is used during C++ throw and longjmp (windows and probably most other
292 * platforms). Some debuggers (windbg) makes use of it as well.
293 *
294 * Windows: This is allocated from hHeap on windows because (at least for
295 * AMD64) the UNWIND_INFO structure address in the
296 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
297 *
298 * Others: Allocated from the regular heap to avoid unnecessary executable data
299 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
300 void *pvUnwindInfo;
301#elif defined(IN_RING0)
302 /** Allocation handle. */
303 RTR0MEMOBJ hMemObj;
304#endif
305} IEMEXECMEMCHUNK;
306/** Pointer to a memory chunk. */
307typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
308
309
310/**
311 * Executable memory allocator for the native recompiler.
312 */
313typedef struct IEMEXECMEMALLOCATOR
314{
315 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
316 uint32_t uMagic;
317
318 /** The chunk size. */
319 uint32_t cbChunk;
320 /** The maximum number of chunks. */
321 uint32_t cMaxChunks;
322 /** The current number of chunks. */
323 uint32_t cChunks;
324 /** Hint where to start looking for available memory. */
325 uint32_t idxChunkHint;
326 /** Statistics: Current number of allocations. */
327 uint32_t cAllocations;
328
329 /** The total amount of memory available. */
330 uint64_t cbTotal;
331 /** Total amount of free memory. */
332 uint64_t cbFree;
333 /** Total amount of memory allocated. */
334 uint64_t cbAllocated;
335
336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
337 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
338 *
339 * Since the chunk size is a power of two and the minimum chunk size is a lot
340 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
341 * require a whole number of uint64_t elements in the allocation bitmap. So,
342 * for sake of simplicity, they are allocated as one continous chunk for
343 * simplicity/laziness. */
344 uint64_t *pbmAlloc;
345 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
346 uint32_t cUnitsPerChunk;
347 /** Number of bitmap elements per chunk (for quickly locating the bitmap
348 * portion corresponding to an chunk). */
349 uint32_t cBitmapElementsPerChunk;
350#else
351 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
352 * @{ */
353 /** The size of the heap internal block header. This is used to adjust the
354 * request memory size to make sure there is exacly enough room for a header at
355 * the end of the blocks we allocate before the next 64 byte alignment line. */
356 uint32_t cbHeapBlockHdr;
357 /** The size of initial heap allocation required make sure the first
358 * allocation is correctly aligned. */
359 uint32_t cbHeapAlignTweak;
360 /** The alignment tweak allocation address. */
361 void *pvAlignTweak;
362 /** @} */
363#endif
364
365#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
366 /** Pointer to the array of unwind info running parallel to aChunks (same
367 * allocation as this structure, located after the bitmaps).
368 * (For Windows, the structures must reside in 32-bit RVA distance to the
369 * actual chunk, so they are allocated off the chunk.) */
370 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
371#endif
372
373 /** The allocation chunks. */
374 RT_FLEXIBLE_ARRAY_EXTENSION
375 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
376} IEMEXECMEMALLOCATOR;
377/** Pointer to an executable memory allocator. */
378typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
379
380/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
381#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
382
383
384static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
385
386
387/**
388 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
389 * the heap statistics.
390 */
391static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
392 uint32_t cbReq, uint32_t idxChunk)
393{
394 pExecMemAllocator->cAllocations += 1;
395 pExecMemAllocator->cbAllocated += cbReq;
396#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
397 pExecMemAllocator->cbFree -= cbReq;
398#else
399 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
400#endif
401 pExecMemAllocator->idxChunkHint = idxChunk;
402
403#ifdef RT_OS_DARWIN
404 /*
405 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
406 * on darwin. So, we mark the pages returned as read+write after alloc and
407 * expect the caller to call iemExecMemAllocatorReadyForUse when done
408 * writing to the allocation.
409 *
410 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
411 * for details.
412 */
413 /** @todo detect if this is necessary... it wasn't required on 10.15 or
414 * whatever older version it was. */
415 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
416 AssertRC(rc);
417#endif
418
419 return pvRet;
420}
421
422
423#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
424static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
425 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
426{
427 /*
428 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
429 */
430 Assert(!(cToScan & 63));
431 Assert(!(idxFirst & 63));
432 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
433 pbmAlloc += idxFirst / 64;
434
435 /*
436 * Scan the bitmap for cReqUnits of consequtive clear bits
437 */
438 /** @todo This can probably be done more efficiently for non-x86 systems. */
439 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
440 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
441 {
442 uint32_t idxAddBit = 1;
443 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
444 idxAddBit++;
445 if (idxAddBit >= cReqUnits)
446 {
447 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
448
449 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
450 pChunk->cFreeUnits -= cReqUnits;
451 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
452
453 void * const pvRet = (uint8_t *)pChunk->pvChunk
454 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
455
456 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
457 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
458 }
459
460 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
461 }
462 return NULL;
463}
464#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
465
466
467static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
468{
469#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
470 /*
471 * Figure out how much to allocate.
472 */
473 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
474 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
475 {
476 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
477 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
478 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
479 {
480 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
481 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
482 if (pvRet)
483 return pvRet;
484 }
485 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
486 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
487 cReqUnits, idxChunk);
488 }
489#else
490 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
491 if (pvRet)
492 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
493#endif
494 return NULL;
495
496}
497
498
499/**
500 * Allocates @a cbReq bytes of executable memory.
501 *
502 * @returns Pointer to the memory, NULL if out of memory or other problem
503 * encountered.
504 * @param pVCpu The cross context virtual CPU structure of the calling
505 * thread.
506 * @param cbReq How many bytes are required.
507 */
508static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
509{
510 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
511 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
512 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
513
514
515 for (unsigned iIteration = 0;; iIteration++)
516 {
517 /*
518 * Adjust the request size so it'll fit the allocator alignment/whatnot.
519 *
520 * For the RTHeapSimple allocator this means to follow the logic described
521 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
522 * existing chunks if we think we've got sufficient free memory around.
523 *
524 * While for the alternative one we just align it up to a whole unit size.
525 */
526#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
527 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
528#else
529 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
530#endif
531 if (cbReq <= pExecMemAllocator->cbFree)
532 {
533 uint32_t const cChunks = pExecMemAllocator->cChunks;
534 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
535 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
536 {
537 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
538 if (pvRet)
539 return pvRet;
540 }
541 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 }
548
549 /*
550 * Can we grow it with another chunk?
551 */
552 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
553 {
554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
555 AssertLogRelRCReturn(rc, NULL);
556
557 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
558 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
559 if (pvRet)
560 return pvRet;
561 AssertFailed();
562 }
563
564 /*
565 * Try prune native TBs once.
566 */
567 if (iIteration == 0)
568 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
569 else
570 {
571 /** @todo stats... */
572 return NULL;
573 }
574 }
575
576}
577
578
579/** This is a hook that we may need later for changing memory protection back
580 * to readonly+exec */
581static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
582{
583#ifdef RT_OS_DARWIN
584 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
585 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
586 AssertRC(rc); RT_NOREF(pVCpu);
587
588 /*
589 * Flush the instruction cache:
590 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
591 */
592 /* sys_dcache_flush(pv, cb); - not necessary */
593 sys_icache_invalidate(pv, cb);
594#else
595 RT_NOREF(pVCpu, pv, cb);
596#endif
597}
598
599
600/**
601 * Frees executable memory.
602 */
603void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
604{
605 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
606 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
607 Assert(pv);
608#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
609 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
610#else
611 Assert(!((uintptr_t)pv & 63));
612#endif
613
614 /* Align the size as we did when allocating the block. */
615#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
616 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
617#else
618 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
619#endif
620
621 /* Free it / assert sanity. */
622#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
623 uint32_t const cChunks = pExecMemAllocator->cChunks;
624 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
625 bool fFound = false;
626 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
627 {
628 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
629 fFound = offChunk < cbChunk;
630 if (fFound)
631 {
632#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
633 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
634 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
635
636 /* Check that it's valid and free it. */
637 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
638 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
639 for (uint32_t i = 1; i < cReqUnits; i++)
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
641 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
642
643 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
644 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
645
646 /* Update the stats. */
647 pExecMemAllocator->cbAllocated -= cb;
648 pExecMemAllocator->cbFree += cb;
649 pExecMemAllocator->cAllocations -= 1;
650 return;
651#else
652 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
653 break;
654#endif
655 }
656 }
657# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
658 AssertFailed();
659# else
660 Assert(fFound);
661# endif
662#endif
663
664#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
665 /* Update stats while cb is freshly calculated.*/
666 pExecMemAllocator->cbAllocated -= cb;
667 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
668 pExecMemAllocator->cAllocations -= 1;
669
670 /* Free it. */
671 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
672#endif
673}
674
675
676
677#ifdef IN_RING3
678# ifdef RT_OS_WINDOWS
679
680/**
681 * Initializes the unwind info structures for windows hosts.
682 */
683static int
684iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
685 void *pvChunk, uint32_t idxChunk)
686{
687 RT_NOREF(pVCpu);
688
689 /*
690 * The AMD64 unwind opcodes.
691 *
692 * This is a program that starts with RSP after a RET instruction that
693 * ends up in recompiled code, and the operations we describe here will
694 * restore all non-volatile registers and bring RSP back to where our
695 * RET address is. This means it's reverse order from what happens in
696 * the prologue.
697 *
698 * Note! Using a frame register approach here both because we have one
699 * and but mainly because the UWOP_ALLOC_LARGE argument values
700 * would be a pain to write initializers for. On the positive
701 * side, we're impervious to changes in the the stack variable
702 * area can can deal with dynamic stack allocations if necessary.
703 */
704 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
705 {
706 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
707 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
708 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
709 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
710 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
711 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
712 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
713 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
714 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
715 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
716 };
717 union
718 {
719 IMAGE_UNWIND_INFO Info;
720 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
721 } s_UnwindInfo =
722 {
723 {
724 /* .Version = */ 1,
725 /* .Flags = */ 0,
726 /* .SizeOfProlog = */ 16, /* whatever */
727 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
728 /* .FrameRegister = */ X86_GREG_xBP,
729 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
730 }
731 };
732 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
733 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
734
735 /*
736 * Calc how much space we need and allocate it off the exec heap.
737 */
738 unsigned const cFunctionEntries = 1;
739 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
740 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
741# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
743 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
744 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
745# else
746 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
747 - pExecMemAllocator->cbHeapBlockHdr;
748 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
749 32 /*cbAlignment*/);
750# endif
751 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
752 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
753
754 /*
755 * Initialize the structures.
756 */
757 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
758
759 paFunctions[0].BeginAddress = 0;
760 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
761 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
762
763 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
764 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
765
766 /*
767 * Register it.
768 */
769 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
770 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
771
772 return VINF_SUCCESS;
773}
774
775
776# else /* !RT_OS_WINDOWS */
777
778/**
779 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
780 */
781DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
782{
783 if (iValue >= 64)
784 {
785 Assert(iValue < 0x2000);
786 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
787 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
788 }
789 else if (iValue >= 0)
790 *Ptr.pb++ = (uint8_t)iValue;
791 else if (iValue > -64)
792 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
793 else
794 {
795 Assert(iValue > -0x2000);
796 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
797 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
798 }
799 return Ptr;
800}
801
802
803/**
804 * Emits an ULEB128 encoded value (up to 64-bit wide).
805 */
806DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
807{
808 while (uValue >= 0x80)
809 {
810 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
811 uValue >>= 7;
812 }
813 *Ptr.pb++ = (uint8_t)uValue;
814 return Ptr;
815}
816
817
818/**
819 * Emits a CFA rule as register @a uReg + offset @a off.
820 */
821DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
822{
823 *Ptr.pb++ = DW_CFA_def_cfa;
824 Ptr = iemDwarfPutUleb128(Ptr, uReg);
825 Ptr = iemDwarfPutUleb128(Ptr, off);
826 return Ptr;
827}
828
829
830/**
831 * Emits a register (@a uReg) save location:
832 * CFA + @a off * data_alignment_factor
833 */
834DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
835{
836 if (uReg < 0x40)
837 *Ptr.pb++ = DW_CFA_offset | uReg;
838 else
839 {
840 *Ptr.pb++ = DW_CFA_offset_extended;
841 Ptr = iemDwarfPutUleb128(Ptr, uReg);
842 }
843 Ptr = iemDwarfPutUleb128(Ptr, off);
844 return Ptr;
845}
846
847
848# if 0 /* unused */
849/**
850 * Emits a register (@a uReg) save location, using signed offset:
851 * CFA + @a offSigned * data_alignment_factor
852 */
853DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
854{
855 *Ptr.pb++ = DW_CFA_offset_extended_sf;
856 Ptr = iemDwarfPutUleb128(Ptr, uReg);
857 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
858 return Ptr;
859}
860# endif
861
862
863/**
864 * Initializes the unwind info section for non-windows hosts.
865 */
866static int
867iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
868 void *pvChunk, uint32_t idxChunk)
869{
870 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
871 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
872
873 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
874
875 /*
876 * Generate the CIE first.
877 */
878# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
879 uint8_t const iDwarfVer = 3;
880# else
881 uint8_t const iDwarfVer = 4;
882# endif
883 RTPTRUNION const PtrCie = Ptr;
884 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
885 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
886 *Ptr.pb++ = iDwarfVer; /* DwARF version */
887 *Ptr.pb++ = 0; /* Augmentation. */
888 if (iDwarfVer >= 4)
889 {
890 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
891 *Ptr.pb++ = 0; /* Segment selector size. */
892 }
893# ifdef RT_ARCH_AMD64
894 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
895# else
896 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
897# endif
898 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
901# elif defined(RT_ARCH_ARM64)
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
903# else
904# error "port me"
905# endif
906 /* Initial instructions: */
907# ifdef RT_ARCH_AMD64
908 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
916# elif defined(RT_ARCH_ARM64)
917# if 1
918 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
919# else
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
921# endif
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
934 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
935 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
936# else
937# error "port me"
938# endif
939 while ((Ptr.u - PtrCie.u) & 3)
940 *Ptr.pb++ = DW_CFA_nop;
941 /* Finalize the CIE size. */
942 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
943
944 /*
945 * Generate an FDE for the whole chunk area.
946 */
947# ifdef IEMNATIVE_USE_LIBUNWIND
948 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
949# endif
950 RTPTRUNION const PtrFde = Ptr;
951 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
952 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
953 Ptr.pu32++;
954 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
955 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
956# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
957 *Ptr.pb++ = DW_CFA_nop;
958# endif
959 while ((Ptr.u - PtrFde.u) & 3)
960 *Ptr.pb++ = DW_CFA_nop;
961 /* Finalize the FDE size. */
962 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
963
964 /* Terminator entry. */
965 *Ptr.pu32++ = 0;
966 *Ptr.pu32++ = 0; /* just to be sure... */
967 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
968
969 /*
970 * Register it.
971 */
972# ifdef IEMNATIVE_USE_LIBUNWIND
973 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
974# else
975 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
976 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
977# endif
978
979# ifdef IEMNATIVE_USE_GDB_JIT
980 /*
981 * Now for telling GDB about this (experimental).
982 *
983 * This seems to work best with ET_DYN.
984 */
985 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
986# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
987 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
988 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
989# else
990 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
991 - pExecMemAllocator->cbHeapBlockHdr;
992 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
993# endif
994 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
995 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
996
997 RT_ZERO(*pSymFile);
998
999 /*
1000 * The ELF header:
1001 */
1002 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1003 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1004 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1005 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1006 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1007 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1008 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1009 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1010# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1011 pSymFile->EHdr.e_type = ET_DYN;
1012# else
1013 pSymFile->EHdr.e_type = ET_REL;
1014# endif
1015# ifdef RT_ARCH_AMD64
1016 pSymFile->EHdr.e_machine = EM_AMD64;
1017# elif defined(RT_ARCH_ARM64)
1018 pSymFile->EHdr.e_machine = EM_AARCH64;
1019# else
1020# error "port me"
1021# endif
1022 pSymFile->EHdr.e_version = 1; /*?*/
1023 pSymFile->EHdr.e_entry = 0;
1024# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1025 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1026# else
1027 pSymFile->EHdr.e_phoff = 0;
1028# endif
1029 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1030 pSymFile->EHdr.e_flags = 0;
1031 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1032# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1033 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1034 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1035# else
1036 pSymFile->EHdr.e_phentsize = 0;
1037 pSymFile->EHdr.e_phnum = 0;
1038# endif
1039 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1040 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1041 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1042
1043 uint32_t offStrTab = 0;
1044#define APPEND_STR(a_szStr) do { \
1045 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1046 offStrTab += sizeof(a_szStr); \
1047 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1048 } while (0)
1049#define APPEND_STR_FMT(a_szStr, ...) do { \
1050 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1051 offStrTab++; \
1052 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1053 } while (0)
1054
1055 /*
1056 * Section headers.
1057 */
1058 /* Section header #0: NULL */
1059 unsigned i = 0;
1060 APPEND_STR("");
1061 RT_ZERO(pSymFile->aShdrs[i]);
1062 i++;
1063
1064 /* Section header: .eh_frame */
1065 pSymFile->aShdrs[i].sh_name = offStrTab;
1066 APPEND_STR(".eh_frame");
1067 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1068 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1069# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1070 pSymFile->aShdrs[i].sh_offset
1071 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1072# else
1073 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1074 pSymFile->aShdrs[i].sh_offset = 0;
1075# endif
1076
1077 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1078 pSymFile->aShdrs[i].sh_link = 0;
1079 pSymFile->aShdrs[i].sh_info = 0;
1080 pSymFile->aShdrs[i].sh_addralign = 1;
1081 pSymFile->aShdrs[i].sh_entsize = 0;
1082 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1083 i++;
1084
1085 /* Section header: .shstrtab */
1086 unsigned const iShStrTab = i;
1087 pSymFile->EHdr.e_shstrndx = iShStrTab;
1088 pSymFile->aShdrs[i].sh_name = offStrTab;
1089 APPEND_STR(".shstrtab");
1090 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1091 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1092# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1093 pSymFile->aShdrs[i].sh_offset
1094 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1095# else
1096 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1097 pSymFile->aShdrs[i].sh_offset = 0;
1098# endif
1099 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1100 pSymFile->aShdrs[i].sh_link = 0;
1101 pSymFile->aShdrs[i].sh_info = 0;
1102 pSymFile->aShdrs[i].sh_addralign = 1;
1103 pSymFile->aShdrs[i].sh_entsize = 0;
1104 i++;
1105
1106 /* Section header: .symbols */
1107 pSymFile->aShdrs[i].sh_name = offStrTab;
1108 APPEND_STR(".symtab");
1109 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1110 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1111 pSymFile->aShdrs[i].sh_offset
1112 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1113 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1114 pSymFile->aShdrs[i].sh_link = iShStrTab;
1115 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1117 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1118 i++;
1119
1120# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1121 /* Section header: .symbols */
1122 pSymFile->aShdrs[i].sh_name = offStrTab;
1123 APPEND_STR(".dynsym");
1124 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1125 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1126 pSymFile->aShdrs[i].sh_offset
1127 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1128 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1129 pSymFile->aShdrs[i].sh_link = iShStrTab;
1130 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1132 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1133 i++;
1134# endif
1135
1136# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1137 /* Section header: .dynamic */
1138 pSymFile->aShdrs[i].sh_name = offStrTab;
1139 APPEND_STR(".dynamic");
1140 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1141 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1142 pSymFile->aShdrs[i].sh_offset
1143 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1144 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1145 pSymFile->aShdrs[i].sh_link = iShStrTab;
1146 pSymFile->aShdrs[i].sh_info = 0;
1147 pSymFile->aShdrs[i].sh_addralign = 1;
1148 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1149 i++;
1150# endif
1151
1152 /* Section header: .text */
1153 unsigned const iShText = i;
1154 pSymFile->aShdrs[i].sh_name = offStrTab;
1155 APPEND_STR(".text");
1156 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1157 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1158# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1159 pSymFile->aShdrs[i].sh_offset
1160 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1161# else
1162 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1163 pSymFile->aShdrs[i].sh_offset = 0;
1164# endif
1165 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1166 pSymFile->aShdrs[i].sh_link = 0;
1167 pSymFile->aShdrs[i].sh_info = 0;
1168 pSymFile->aShdrs[i].sh_addralign = 1;
1169 pSymFile->aShdrs[i].sh_entsize = 0;
1170 i++;
1171
1172 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1173
1174# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1175 /*
1176 * The program headers:
1177 */
1178 /* Everything in a single LOAD segment: */
1179 i = 0;
1180 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1181 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1182 pSymFile->aPhdrs[i].p_offset
1183 = pSymFile->aPhdrs[i].p_vaddr
1184 = pSymFile->aPhdrs[i].p_paddr = 0;
1185 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1186 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1187 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1188 i++;
1189 /* The .dynamic segment. */
1190 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1191 pSymFile->aPhdrs[i].p_flags = PF_R;
1192 pSymFile->aPhdrs[i].p_offset
1193 = pSymFile->aPhdrs[i].p_vaddr
1194 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1195 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1196 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1197 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1198 i++;
1199
1200 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1201
1202 /*
1203 * The dynamic section:
1204 */
1205 i = 0;
1206 pSymFile->aDyn[i].d_tag = DT_SONAME;
1207 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1208 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1211 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1214 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_NULL;
1223 i++;
1224 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1225# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1226
1227 /*
1228 * Symbol tables:
1229 */
1230 /** @todo gdb doesn't seem to really like this ... */
1231 i = 0;
1232 pSymFile->aSymbols[i].st_name = 0;
1233 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1234 pSymFile->aSymbols[i].st_value = 0;
1235 pSymFile->aSymbols[i].st_size = 0;
1236 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1237 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1238# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1239 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1240# endif
1241 i++;
1242
1243 pSymFile->aSymbols[i].st_name = 0;
1244 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1245 pSymFile->aSymbols[i].st_value = 0;
1246 pSymFile->aSymbols[i].st_size = 0;
1247 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1248 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1249 i++;
1250
1251 pSymFile->aSymbols[i].st_name = offStrTab;
1252 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1253# if 0
1254 pSymFile->aSymbols[i].st_shndx = iShText;
1255 pSymFile->aSymbols[i].st_value = 0;
1256# else
1257 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1258 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1259# endif
1260 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1261 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1262 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1263# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1264 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1265 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1266# endif
1267 i++;
1268
1269 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1270 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1271
1272 /*
1273 * The GDB JIT entry and informing GDB.
1274 */
1275 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1276# if 1
1277 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1278# else
1279 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1280# endif
1281
1282 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1283 RTCritSectEnter(&g_IemNativeGdbJitLock);
1284 pEhFrame->GdbJitEntry.pNext = NULL;
1285 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1286 if (__jit_debug_descriptor.pTail)
1287 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1288 else
1289 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1290 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1291 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1292
1293 /* Notify GDB: */
1294 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1295 __jit_debug_register_code();
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1297 RTCritSectLeave(&g_IemNativeGdbJitLock);
1298
1299# else /* !IEMNATIVE_USE_GDB_JIT */
1300 RT_NOREF(pVCpu);
1301# endif /* !IEMNATIVE_USE_GDB_JIT */
1302
1303 return VINF_SUCCESS;
1304}
1305
1306# endif /* !RT_OS_WINDOWS */
1307#endif /* IN_RING3 */
1308
1309
1310/**
1311 * Adds another chunk to the executable memory allocator.
1312 *
1313 * This is used by the init code for the initial allocation and later by the
1314 * regular allocator function when it's out of memory.
1315 */
1316static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1317{
1318 /* Check that we've room for growth. */
1319 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1320 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1321
1322 /* Allocate a chunk. */
1323#ifdef RT_OS_DARWIN
1324 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1325#else
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1327#endif
1328 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1329
1330#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1331 int rc = VINF_SUCCESS;
1332#else
1333 /* Initialize the heap for the chunk. */
1334 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1335 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1336 AssertRC(rc);
1337 if (RT_SUCCESS(rc))
1338 {
1339 /*
1340 * We want the memory to be aligned on 64 byte, so the first time thru
1341 * here we do some exploratory allocations to see how we can achieve this.
1342 * On subsequent runs we only make an initial adjustment allocation, if
1343 * necessary.
1344 *
1345 * Since we own the heap implementation, we know that the internal block
1346 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1347 * so all we need to wrt allocation size adjustments is to add 32 bytes
1348 * to the size, align up by 64 bytes, and subtract 32 bytes.
1349 *
1350 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1351 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1352 * allocation to force subsequent allocations to return 64 byte aligned
1353 * user areas.
1354 */
1355 if (!pExecMemAllocator->cbHeapBlockHdr)
1356 {
1357 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1358 pExecMemAllocator->cbHeapAlignTweak = 64;
1359 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1360 32 /*cbAlignment*/);
1361 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1362
1363 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1364 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1365 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1366 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1367 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1368
1369 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 RTHeapSimpleFree(hHeap, pvTest2);
1376 RTHeapSimpleFree(hHeap, pvTest1);
1377 }
1378 else
1379 {
1380 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1381 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1382 }
1383 if (RT_SUCCESS(rc))
1384#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1385 {
1386 /*
1387 * Add the chunk.
1388 *
1389 * This must be done before the unwind init so windows can allocate
1390 * memory from the chunk when using the alternative sub-allocator.
1391 */
1392 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1393#ifdef IN_RING3
1394 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1395#endif
1396#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1397 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1398#else
1399 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1400 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1401 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1402 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1403#endif
1404
1405 pExecMemAllocator->cChunks = idxChunk + 1;
1406 pExecMemAllocator->idxChunkHint = idxChunk;
1407
1408#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1409 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1410 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1411#else
1412 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1413 pExecMemAllocator->cbTotal += cbFree;
1414 pExecMemAllocator->cbFree += cbFree;
1415#endif
1416
1417#ifdef IN_RING3
1418 /*
1419 * Initialize the unwind information (this cannot really fail atm).
1420 * (This sets pvUnwindInfo.)
1421 */
1422 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1423 if (RT_SUCCESS(rc))
1424#endif
1425 {
1426 return VINF_SUCCESS;
1427 }
1428
1429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1430 /* Just in case the impossible happens, undo the above up: */
1431 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1432 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1433 pExecMemAllocator->cChunks = idxChunk;
1434 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1435 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1436 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1437 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1438#endif
1439 }
1440#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1441 }
1442#endif
1443 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1444 RT_NOREF(pVCpu);
1445 return rc;
1446}
1447
1448
1449/**
1450 * Initializes the executable memory allocator for native recompilation on the
1451 * calling EMT.
1452 *
1453 * @returns VBox status code.
1454 * @param pVCpu The cross context virtual CPU structure of the calling
1455 * thread.
1456 * @param cbMax The max size of the allocator.
1457 * @param cbInitial The initial allocator size.
1458 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1459 * dependent).
1460 */
1461int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1462{
1463 /*
1464 * Validate input.
1465 */
1466 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1467 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1468 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1469 || cbChunk == 0
1470 || ( RT_IS_POWER_OF_TWO(cbChunk)
1471 && cbChunk >= _1M
1472 && cbChunk <= _256M
1473 && cbChunk <= cbMax),
1474 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1475 VERR_OUT_OF_RANGE);
1476
1477 /*
1478 * Adjust/figure out the chunk size.
1479 */
1480 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1481 {
1482 if (cbMax >= _256M)
1483 cbChunk = _64M;
1484 else
1485 {
1486 if (cbMax < _16M)
1487 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1488 else
1489 cbChunk = (uint32_t)cbMax / 4;
1490 if (!RT_IS_POWER_OF_TWO(cbChunk))
1491 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1492 }
1493 }
1494
1495 if (cbChunk > cbMax)
1496 cbMax = cbChunk;
1497 else
1498 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1499 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1500 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1501
1502 /*
1503 * Allocate and initialize the allocatore instance.
1504 */
1505 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1506#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1507 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1508 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1509 cbNeeded += cbBitmap * cMaxChunks;
1510 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1511 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1512#endif
1513#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1514 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1515 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1516#endif
1517 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1518 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1519 VERR_NO_MEMORY);
1520 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1521 pExecMemAllocator->cbChunk = cbChunk;
1522 pExecMemAllocator->cMaxChunks = cMaxChunks;
1523 pExecMemAllocator->cChunks = 0;
1524 pExecMemAllocator->idxChunkHint = 0;
1525 pExecMemAllocator->cAllocations = 0;
1526 pExecMemAllocator->cbTotal = 0;
1527 pExecMemAllocator->cbFree = 0;
1528 pExecMemAllocator->cbAllocated = 0;
1529#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1530 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1531 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1532 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1533 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1534#endif
1535#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1536 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1537#endif
1538 for (uint32_t i = 0; i < cMaxChunks; i++)
1539 {
1540#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1541 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1542 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1543#else
1544 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1545#endif
1546 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1547#ifdef IN_RING0
1548 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1549#else
1550 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1551#endif
1552 }
1553 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1554
1555 /*
1556 * Do the initial allocations.
1557 */
1558 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1559 {
1560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1561 AssertLogRelRCReturn(rc, rc);
1562 }
1563
1564 pExecMemAllocator->idxChunkHint = 0;
1565
1566 return VINF_SUCCESS;
1567}
1568
1569
1570/*********************************************************************************************************************************
1571* Native Recompilation *
1572*********************************************************************************************************************************/
1573
1574
1575/**
1576 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1577 */
1578IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1579{
1580 pVCpu->iem.s.cInstructions += idxInstr;
1581 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1582}
1583
1584
1585/**
1586 * Used by TB code when it wants to raise a \#GP(0).
1587 */
1588IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1589{
1590 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1591#ifndef _MSC_VER
1592 return VINF_IEM_RAISED_XCPT; /* not reached */
1593#endif
1594}
1595
1596
1597/**
1598 * Used by TB code when detecting opcode changes.
1599 * @see iemThreadeFuncWorkerObsoleteTb
1600 */
1601IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1602{
1603 /* We set fSafeToFree to false where as we're being called in the context
1604 of a TB callback function, which for native TBs means we cannot release
1605 the executable memory till we've returned our way back to iemTbExec as
1606 that return path codes via the native code generated for the TB. */
1607 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1608 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1609 return VINF_IEM_REEXEC_BREAK;
1610}
1611
1612
1613/**
1614 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1615 */
1616IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1617{
1618 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1619 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1620 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1621 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1622 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1623 return VINF_IEM_REEXEC_BREAK;
1624}
1625
1626
1627/**
1628 * Used by TB code when we missed a PC check after a branch.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1631{
1632 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1633 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1634 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1635 pVCpu->iem.s.pbInstrBuf));
1636 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1637 return VINF_IEM_REEXEC_BREAK;
1638}
1639
1640
1641
1642/*********************************************************************************************************************************
1643* Helpers: Segmented memory fetches and stores. *
1644*********************************************************************************************************************************/
1645
1646/**
1647 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1648 */
1649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1650{
1651 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1652}
1653
1654
1655/**
1656 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1657 * to 16 bits.
1658 */
1659IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1660{
1661 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1662}
1663
1664
1665/**
1666 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1667 * to 32 bits.
1668 */
1669IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1670{
1671 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1672}
1673
1674/**
1675 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1676 * to 64 bits.
1677 */
1678IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1679{
1680 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1681}
1682
1683
1684/**
1685 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1686 */
1687IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1688{
1689 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1690}
1691
1692
1693/**
1694 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1695 * to 32 bits.
1696 */
1697IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1698{
1699 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1700}
1701
1702
1703/**
1704 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1705 * to 64 bits.
1706 */
1707IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1708{
1709 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1710}
1711
1712
1713/**
1714 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1715 */
1716IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1717{
1718 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1719}
1720
1721
1722/**
1723 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1724 * to 64 bits.
1725 */
1726IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1727{
1728 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1729}
1730
1731
1732/**
1733 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1736{
1737 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1738}
1739
1740
1741/**
1742 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1745{
1746 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1747}
1748
1749
1750/**
1751 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1754{
1755 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1756}
1757
1758
1759/**
1760 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1763{
1764 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1765}
1766
1767
1768/**
1769 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1772{
1773 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1774}
1775
1776
1777
1778/**
1779 * Used by TB code to push unsigned 16-bit value onto a generic stack.
1780 */
1781IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1782{
1783 iemMemStackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemStackPushU16SafeJmp */
1784}
1785
1786
1787/**
1788 * Used by TB code to push unsigned 32-bit value onto a generic stack.
1789 */
1790IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
1791{
1792 iemMemStackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SafeJmp */
1793}
1794
1795
1796/**
1797 * Used by TB code to push 32-bit selector value onto a generic stack.
1798 *
1799 * Intel CPUs doesn't do write a whole dword, thus the special function.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
1802{
1803 iemMemStackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SRegSafeJmp */
1804}
1805
1806
1807/**
1808 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1809 */
1810IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
1811{
1812 iemMemStackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemStackPushU64SafeJmp */
1813}
1814
1815
1816/**
1817 * Used by TB code to pop a 16-bit general purpose register off a generic stack.
1818 */
1819IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
1820{
1821 iemMemStackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU16SafeJmp */
1822}
1823
1824
1825/**
1826 * Used by TB code to pop a 32-bit general purpose register off a generic stack.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
1829{
1830 iemMemStackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU32SafeJmp */
1831}
1832
1833
1834/**
1835 * Used by TB code to pop a 64-bit general purpose register off a generic stack.
1836 */
1837IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
1838{
1839 iemMemStackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU64SafeJmp */
1840}
1841
1842
1843
1844/*********************************************************************************************************************************
1845* Helpers: Flat memory fetches and stores. *
1846*********************************************************************************************************************************/
1847
1848/**
1849 * Used by TB code to load unsigned 8-bit data w/ flat address.
1850 * @note Zero extending the value to 64-bit to simplify assembly.
1851 */
1852IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1853{
1854 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1855}
1856
1857
1858/**
1859 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1860 * to 16 bits.
1861 * @note Zero extending the value to 64-bit to simplify assembly.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1864{
1865 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1866}
1867
1868
1869/**
1870 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1871 * to 32 bits.
1872 * @note Zero extending the value to 64-bit to simplify assembly.
1873 */
1874IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1875{
1876 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1877}
1878
1879
1880/**
1881 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1882 * to 64 bits.
1883 */
1884IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1885{
1886 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1887}
1888
1889
1890/**
1891 * Used by TB code to load unsigned 16-bit data w/ flat address.
1892 * @note Zero extending the value to 64-bit to simplify assembly.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1895{
1896 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1897}
1898
1899
1900/**
1901 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1902 * to 32 bits.
1903 * @note Zero extending the value to 64-bit to simplify assembly.
1904 */
1905IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1906{
1907 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1908}
1909
1910
1911/**
1912 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1913 * to 64 bits.
1914 * @note Zero extending the value to 64-bit to simplify assembly.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1917{
1918 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1919}
1920
1921
1922/**
1923 * Used by TB code to load unsigned 32-bit data w/ flat address.
1924 * @note Zero extending the value to 64-bit to simplify assembly.
1925 */
1926IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1927{
1928 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1929}
1930
1931
1932/**
1933 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1934 * to 64 bits.
1935 * @note Zero extending the value to 64-bit to simplify assembly.
1936 */
1937IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1938{
1939 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1940}
1941
1942
1943/**
1944 * Used by TB code to load unsigned 64-bit data w/ flat address.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1947{
1948 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1949}
1950
1951
1952/**
1953 * Used by TB code to store unsigned 8-bit data w/ flat address.
1954 */
1955IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1956{
1957 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1958}
1959
1960
1961/**
1962 * Used by TB code to store unsigned 16-bit data w/ flat address.
1963 */
1964IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1965{
1966 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1967}
1968
1969
1970/**
1971 * Used by TB code to store unsigned 32-bit data w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1974{
1975 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1976}
1977
1978
1979/**
1980 * Used by TB code to store unsigned 64-bit data w/ flat address.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1983{
1984 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1985}
1986
1987
1988
1989/**
1990 * Used by TB code to push unsigned 16-bit value onto a flat 32-bit stack.
1991 */
1992IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1993{
1994 iemMemFlat32StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat32StackPushU16SafeJmp */
1995}
1996
1997
1998/**
1999 * Used by TB code to push unsigned 32-bit value onto a flat 32-bit stack.
2000 */
2001IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
2002{
2003 iemMemFlat32StackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SafeJmp */
2004}
2005
2006
2007/**
2008 * Used by TB code to push segment selector value onto a flat 32-bit stack.
2009 *
2010 * Intel CPUs doesn't do write a whole dword, thus the special function.
2011 */
2012IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
2013{
2014 iemMemFlat32StackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SRegSafeJmp */
2015}
2016
2017
2018/**
2019 * Used by TB code to pop a 16-bit general purpose register off a flat 32-bit stack.
2020 */
2021IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
2022{
2023 iemMemFlat32StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU16SafeJmp */
2024}
2025
2026
2027/**
2028 * Used by TB code to pop a 64-bit general purpose register off a flat 32-bit stack.
2029 */
2030IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
2031{
2032 iemMemFlat32StackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU32SafeJmp */
2033}
2034
2035
2036
2037/**
2038 * Used by TB code to push unsigned 16-bit value onto a flat 64-bit stack.
2039 */
2040IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
2041{
2042 iemMemFlat64StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat64StackPushU16SafeJmp */
2043}
2044
2045
2046/**
2047 * Used by TB code to push unsigned 64-bit value onto a flat 64-bit stack.
2048 */
2049IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
2050{
2051 iemMemFlat64StackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemFlat64StackPushU64SafeJmp */
2052}
2053
2054
2055/**
2056 * Used by TB code to pop a 16-bit general purpose register off a flat 64-bit stack.
2057 */
2058IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
2059{
2060 iemMemFlat64StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU16SafeJmp */
2061}
2062
2063
2064/**
2065 * Used by TB code to pop a 64-bit general purpose register off a flat 64-bit stack.
2066 */
2067IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
2068{
2069 iemMemFlat64StackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU64SafeJmp */
2070}
2071
2072
2073
2074/*********************************************************************************************************************************
2075* Helpers: Segmented memory mapping. *
2076*********************************************************************************************************************************/
2077
2078/**
2079 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2082 RTGCPTR GCPtrMem, uint8_t iSegReg))
2083{
2084 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RwSafeJmp */
2085}
2086
2087
2088/**
2089 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2092 RTGCPTR GCPtrMem, uint8_t iSegReg))
2093{
2094 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8WoSafeJmp */
2095}
2096
2097
2098/**
2099 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2100 */
2101IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2102 RTGCPTR GCPtrMem, uint8_t iSegReg))
2103{
2104 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RoSafeJmp */
2105}
2106
2107
2108/**
2109 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2110 */
2111IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2112 RTGCPTR GCPtrMem, uint8_t iSegReg))
2113{
2114 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RwSafeJmp */
2115}
2116
2117
2118/**
2119 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2122 RTGCPTR GCPtrMem, uint8_t iSegReg))
2123{
2124 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16WoSafeJmp */
2125}
2126
2127
2128/**
2129 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2130 */
2131IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2132 RTGCPTR GCPtrMem, uint8_t iSegReg))
2133{
2134 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RoSafeJmp */
2135}
2136
2137
2138/**
2139 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2140 */
2141IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2142 RTGCPTR GCPtrMem, uint8_t iSegReg))
2143{
2144 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RwSafeJmp */
2145}
2146
2147
2148/**
2149 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2150 */
2151IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2152 RTGCPTR GCPtrMem, uint8_t iSegReg))
2153{
2154 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32WoSafeJmp */
2155}
2156
2157
2158/**
2159 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2160 */
2161IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2162 RTGCPTR GCPtrMem, uint8_t iSegReg))
2163{
2164 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RoSafeJmp */
2165}
2166
2167
2168/**
2169 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2172 RTGCPTR GCPtrMem, uint8_t iSegReg))
2173{
2174 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RwSafeJmp */
2175}
2176
2177
2178/**
2179 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2182 RTGCPTR GCPtrMem, uint8_t iSegReg))
2183{
2184 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64WoSafeJmp */
2185}
2186
2187
2188/**
2189 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2190 */
2191IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2192 RTGCPTR GCPtrMem, uint8_t iSegReg))
2193{
2194 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RoSafeJmp */
2195}
2196
2197
2198/**
2199 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2202 RTGCPTR GCPtrMem, uint8_t iSegReg))
2203{
2204 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataR80WoSafeJmp */
2205}
2206
2207
2208/**
2209 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2210 */
2211IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2212 RTGCPTR GCPtrMem, uint8_t iSegReg))
2213{
2214 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataD80WoSafeJmp */
2215}
2216
2217
2218/**
2219 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2220 */
2221IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2222 RTGCPTR GCPtrMem, uint8_t iSegReg))
2223{
2224 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RwSafeJmp */
2225}
2226
2227
2228/**
2229 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2230 */
2231IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2232 RTGCPTR GCPtrMem, uint8_t iSegReg))
2233{
2234 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128WoSafeJmp */
2235}
2236
2237
2238/**
2239 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2240 */
2241IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2242 RTGCPTR GCPtrMem, uint8_t iSegReg))
2243{
2244 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RoSafeJmp */
2245}
2246
2247
2248/*********************************************************************************************************************************
2249* Helpers: Flat memory mapping. *
2250*********************************************************************************************************************************/
2251
2252/**
2253 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2254 */
2255IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2256{
2257 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RwSafeJmp */
2258}
2259
2260
2261/**
2262 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2263 */
2264IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2265{
2266 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8WoSafeJmp */
2267}
2268
2269
2270/**
2271 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2272 */
2273IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2274{
2275 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RoSafeJmp */
2276}
2277
2278
2279/**
2280 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2281 */
2282IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2283{
2284 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RwSafeJmp */
2285}
2286
2287
2288/**
2289 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2290 */
2291IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2292{
2293 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16WoSafeJmp */
2294}
2295
2296
2297/**
2298 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2299 */
2300IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2301{
2302 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RoSafeJmp */
2303}
2304
2305
2306/**
2307 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2308 */
2309IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2310{
2311 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RwSafeJmp */
2312}
2313
2314
2315/**
2316 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2317 */
2318IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2319{
2320 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32WoSafeJmp */
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2328{
2329 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RoSafeJmp */
2330}
2331
2332
2333/**
2334 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2335 */
2336IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2337{
2338 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RwSafeJmp */
2339}
2340
2341
2342/**
2343 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2344 */
2345IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2346{
2347 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64WoSafeJmp */
2348}
2349
2350
2351/**
2352 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2353 */
2354IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2355{
2356 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RoSafeJmp */
2357}
2358
2359
2360/**
2361 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2362 */
2363IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2364{
2365 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataR80WoSafeJmp */
2366}
2367
2368
2369/**
2370 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2371 */
2372IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2373{
2374 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataD80WoSafeJmp */
2375}
2376
2377
2378/**
2379 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2380 */
2381IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2382{
2383 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RwSafeJmp */
2384}
2385
2386
2387/**
2388 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2389 */
2390IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2391{
2392 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128WoSafeJmp */
2393}
2394
2395
2396/**
2397 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2400{
2401 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RoSafeJmp */
2402}
2403
2404
2405/*********************************************************************************************************************************
2406* Helpers: Commit, rollback & unmap *
2407*********************************************************************************************************************************/
2408
2409/**
2410 * Used by TB code to commit and unmap a read-write memory mapping.
2411 */
2412IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2413{
2414 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2415}
2416
2417
2418/**
2419 * Used by TB code to commit and unmap a write-only memory mapping.
2420 */
2421IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2422{
2423 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2424}
2425
2426
2427/**
2428 * Used by TB code to commit and unmap a read-only memory mapping.
2429 */
2430IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2431{
2432 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2433}
2434
2435
2436/**
2437 * Reinitializes the native recompiler state.
2438 *
2439 * Called before starting a new recompile job.
2440 */
2441static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2442{
2443 pReNative->cLabels = 0;
2444 pReNative->bmLabelTypes = 0;
2445 pReNative->cFixups = 0;
2446#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2447 pReNative->pDbgInfo->cEntries = 0;
2448#endif
2449 pReNative->pTbOrg = pTb;
2450 pReNative->cCondDepth = 0;
2451 pReNative->uCondSeqNo = 0;
2452 pReNative->uCheckIrqSeqNo = 0;
2453 pReNative->uTlbSeqNo = 0;
2454
2455 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2456#if IEMNATIVE_HST_GREG_COUNT < 32
2457 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2458#endif
2459 ;
2460 pReNative->Core.bmHstRegsWithGstShadow = 0;
2461 pReNative->Core.bmGstRegShadows = 0;
2462 pReNative->Core.bmVars = 0;
2463 pReNative->Core.bmStack = 0;
2464 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2465 pReNative->Core.u64ArgVars = UINT64_MAX;
2466
2467 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2468 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2469 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2470 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2471 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2472 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2473 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2474 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2475 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2476 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2477
2478 /* Full host register reinit: */
2479 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2480 {
2481 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2482 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2483 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2484 }
2485
2486 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2487 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2488#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2489 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2490#endif
2491#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2492 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2493#endif
2494 );
2495 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2496 {
2497 fRegs &= ~RT_BIT_32(idxReg);
2498 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2499 }
2500
2501 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2502#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2503 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2504#endif
2505#ifdef IEMNATIVE_REG_FIXED_TMP0
2506 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2507#endif
2508 return pReNative;
2509}
2510
2511
2512/**
2513 * Allocates and initializes the native recompiler state.
2514 *
2515 * This is called the first time an EMT wants to recompile something.
2516 *
2517 * @returns Pointer to the new recompiler state.
2518 * @param pVCpu The cross context virtual CPU structure of the calling
2519 * thread.
2520 * @param pTb The TB that's about to be recompiled.
2521 * @thread EMT(pVCpu)
2522 */
2523static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2524{
2525 VMCPU_ASSERT_EMT(pVCpu);
2526
2527 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2528 AssertReturn(pReNative, NULL);
2529
2530 /*
2531 * Try allocate all the buffers and stuff we need.
2532 */
2533 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2534 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2535 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2536#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2537 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2538#endif
2539 if (RT_LIKELY( pReNative->pInstrBuf
2540 && pReNative->paLabels
2541 && pReNative->paFixups)
2542#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2543 && pReNative->pDbgInfo
2544#endif
2545 )
2546 {
2547 /*
2548 * Set the buffer & array sizes on success.
2549 */
2550 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2551 pReNative->cLabelsAlloc = _8K;
2552 pReNative->cFixupsAlloc = _16K;
2553#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2554 pReNative->cDbgInfoAlloc = _16K;
2555#endif
2556
2557 /*
2558 * Done, just need to save it and reinit it.
2559 */
2560 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2561 return iemNativeReInit(pReNative, pTb);
2562 }
2563
2564 /*
2565 * Failed. Cleanup and return.
2566 */
2567 AssertFailed();
2568 RTMemFree(pReNative->pInstrBuf);
2569 RTMemFree(pReNative->paLabels);
2570 RTMemFree(pReNative->paFixups);
2571#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2572 RTMemFree(pReNative->pDbgInfo);
2573#endif
2574 RTMemFree(pReNative);
2575 return NULL;
2576}
2577
2578
2579/**
2580 * Creates a label
2581 *
2582 * If the label does not yet have a defined position,
2583 * call iemNativeLabelDefine() later to set it.
2584 *
2585 * @returns Label ID. Throws VBox status code on failure, so no need to check
2586 * the return value.
2587 * @param pReNative The native recompile state.
2588 * @param enmType The label type.
2589 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2590 * label is not yet defined (default).
2591 * @param uData Data associated with the lable. Only applicable to
2592 * certain type of labels. Default is zero.
2593 */
2594DECL_HIDDEN_THROW(uint32_t)
2595iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2596 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2597{
2598 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2599
2600 /*
2601 * Locate existing label definition.
2602 *
2603 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2604 * and uData is zero.
2605 */
2606 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2607 uint32_t const cLabels = pReNative->cLabels;
2608 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2609#ifndef VBOX_STRICT
2610 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2611 && offWhere == UINT32_MAX
2612 && uData == 0
2613#endif
2614 )
2615 {
2616#ifndef VBOX_STRICT
2617 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2618 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2619 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2620 if (idxLabel < pReNative->cLabels)
2621 return idxLabel;
2622#else
2623 for (uint32_t i = 0; i < cLabels; i++)
2624 if ( paLabels[i].enmType == enmType
2625 && paLabels[i].uData == uData)
2626 {
2627 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2628 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2629 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2630 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2631 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2632 return i;
2633 }
2634 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2635 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2636#endif
2637 }
2638
2639 /*
2640 * Make sure we've got room for another label.
2641 */
2642 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2643 { /* likely */ }
2644 else
2645 {
2646 uint32_t cNew = pReNative->cLabelsAlloc;
2647 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2648 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2649 cNew *= 2;
2650 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2651 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2652 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2653 pReNative->paLabels = paLabels;
2654 pReNative->cLabelsAlloc = cNew;
2655 }
2656
2657 /*
2658 * Define a new label.
2659 */
2660 paLabels[cLabels].off = offWhere;
2661 paLabels[cLabels].enmType = enmType;
2662 paLabels[cLabels].uData = uData;
2663 pReNative->cLabels = cLabels + 1;
2664
2665 Assert((unsigned)enmType < 64);
2666 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2667
2668 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2669 {
2670 Assert(uData == 0);
2671 pReNative->aidxUniqueLabels[enmType] = cLabels;
2672 }
2673
2674 if (offWhere != UINT32_MAX)
2675 {
2676#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2677 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2678 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2679#endif
2680 }
2681 return cLabels;
2682}
2683
2684
2685/**
2686 * Defines the location of an existing label.
2687 *
2688 * @param pReNative The native recompile state.
2689 * @param idxLabel The label to define.
2690 * @param offWhere The position.
2691 */
2692DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2693{
2694 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2695 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2696 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2697 pLabel->off = offWhere;
2698#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2699 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2700 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2701#endif
2702}
2703
2704
2705/**
2706 * Looks up a lable.
2707 *
2708 * @returns Label ID if found, UINT32_MAX if not.
2709 */
2710static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2711 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2712{
2713 Assert((unsigned)enmType < 64);
2714 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2715 {
2716 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2717 return pReNative->aidxUniqueLabels[enmType];
2718
2719 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2720 uint32_t const cLabels = pReNative->cLabels;
2721 for (uint32_t i = 0; i < cLabels; i++)
2722 if ( paLabels[i].enmType == enmType
2723 && paLabels[i].uData == uData
2724 && ( paLabels[i].off == offWhere
2725 || offWhere == UINT32_MAX
2726 || paLabels[i].off == UINT32_MAX))
2727 return i;
2728 }
2729 return UINT32_MAX;
2730}
2731
2732
2733/**
2734 * Adds a fixup.
2735 *
2736 * @throws VBox status code (int) on failure.
2737 * @param pReNative The native recompile state.
2738 * @param offWhere The instruction offset of the fixup location.
2739 * @param idxLabel The target label ID for the fixup.
2740 * @param enmType The fixup type.
2741 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2742 */
2743DECL_HIDDEN_THROW(void)
2744iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2745 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2746{
2747 Assert(idxLabel <= UINT16_MAX);
2748 Assert((unsigned)enmType <= UINT8_MAX);
2749
2750 /*
2751 * Make sure we've room.
2752 */
2753 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2754 uint32_t const cFixups = pReNative->cFixups;
2755 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2756 { /* likely */ }
2757 else
2758 {
2759 uint32_t cNew = pReNative->cFixupsAlloc;
2760 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2761 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2762 cNew *= 2;
2763 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2764 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2765 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2766 pReNative->paFixups = paFixups;
2767 pReNative->cFixupsAlloc = cNew;
2768 }
2769
2770 /*
2771 * Add the fixup.
2772 */
2773 paFixups[cFixups].off = offWhere;
2774 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2775 paFixups[cFixups].enmType = enmType;
2776 paFixups[cFixups].offAddend = offAddend;
2777 pReNative->cFixups = cFixups + 1;
2778}
2779
2780
2781/**
2782 * Slow code path for iemNativeInstrBufEnsure.
2783 */
2784DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2785{
2786 /* Double the buffer size till we meet the request. */
2787 uint32_t cNew = pReNative->cInstrBufAlloc;
2788 AssertReturn(cNew > 0, NULL);
2789 do
2790 cNew *= 2;
2791 while (cNew < off + cInstrReq);
2792
2793 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2794#ifdef RT_ARCH_ARM64
2795 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2796#else
2797 uint32_t const cbMaxInstrBuf = _2M;
2798#endif
2799 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2800
2801 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2802 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2803
2804 pReNative->cInstrBufAlloc = cNew;
2805 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2806}
2807
2808#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2809
2810/**
2811 * Grows the static debug info array used during recompilation.
2812 *
2813 * @returns Pointer to the new debug info block; throws VBox status code on
2814 * failure, so no need to check the return value.
2815 */
2816DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2817{
2818 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2819 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2820 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2821 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2822 pReNative->pDbgInfo = pDbgInfo;
2823 pReNative->cDbgInfoAlloc = cNew;
2824 return pDbgInfo;
2825}
2826
2827
2828/**
2829 * Adds a new debug info uninitialized entry, returning the pointer to it.
2830 */
2831DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2832{
2833 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2834 { /* likely */ }
2835 else
2836 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2837 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2838}
2839
2840
2841/**
2842 * Debug Info: Adds a native offset record, if necessary.
2843 */
2844static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2845{
2846 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2847
2848 /*
2849 * Search backwards to see if we've got a similar record already.
2850 */
2851 uint32_t idx = pDbgInfo->cEntries;
2852 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
2853 while (idx-- > idxStop)
2854 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
2855 {
2856 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
2857 return;
2858 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
2859 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2860 break;
2861 }
2862
2863 /*
2864 * Add it.
2865 */
2866 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2867 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2868 pEntry->NativeOffset.offNative = off;
2869}
2870
2871
2872/**
2873 * Debug Info: Record info about a label.
2874 */
2875static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2876{
2877 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2878 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2879 pEntry->Label.uUnused = 0;
2880 pEntry->Label.enmLabel = (uint8_t)enmType;
2881 pEntry->Label.uData = uData;
2882}
2883
2884
2885/**
2886 * Debug Info: Record info about a threaded call.
2887 */
2888static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2889{
2890 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2891 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2892 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2893 pEntry->ThreadedCall.uUnused = 0;
2894 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2895}
2896
2897
2898/**
2899 * Debug Info: Record info about a new guest instruction.
2900 */
2901static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2902{
2903 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2904 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2905 pEntry->GuestInstruction.uUnused = 0;
2906 pEntry->GuestInstruction.fExec = fExec;
2907}
2908
2909
2910/**
2911 * Debug Info: Record info about guest register shadowing.
2912 */
2913static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2914 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2915{
2916 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2917 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2918 pEntry->GuestRegShadowing.uUnused = 0;
2919 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2920 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2921 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2922}
2923
2924#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2925
2926
2927/*********************************************************************************************************************************
2928* Register Allocator *
2929*********************************************************************************************************************************/
2930
2931/**
2932 * Register parameter indexes (indexed by argument number).
2933 */
2934DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2935{
2936 IEMNATIVE_CALL_ARG0_GREG,
2937 IEMNATIVE_CALL_ARG1_GREG,
2938 IEMNATIVE_CALL_ARG2_GREG,
2939 IEMNATIVE_CALL_ARG3_GREG,
2940#if defined(IEMNATIVE_CALL_ARG4_GREG)
2941 IEMNATIVE_CALL_ARG4_GREG,
2942# if defined(IEMNATIVE_CALL_ARG5_GREG)
2943 IEMNATIVE_CALL_ARG5_GREG,
2944# if defined(IEMNATIVE_CALL_ARG6_GREG)
2945 IEMNATIVE_CALL_ARG6_GREG,
2946# if defined(IEMNATIVE_CALL_ARG7_GREG)
2947 IEMNATIVE_CALL_ARG7_GREG,
2948# endif
2949# endif
2950# endif
2951#endif
2952};
2953
2954/**
2955 * Call register masks indexed by argument count.
2956 */
2957DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2958{
2959 0,
2960 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2961 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2962 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2963 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2964 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2965#if defined(IEMNATIVE_CALL_ARG4_GREG)
2966 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2967 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2968# if defined(IEMNATIVE_CALL_ARG5_GREG)
2969 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2970 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2971# if defined(IEMNATIVE_CALL_ARG6_GREG)
2972 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2973 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2974 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2975# if defined(IEMNATIVE_CALL_ARG7_GREG)
2976 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2977 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2978 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2979# endif
2980# endif
2981# endif
2982#endif
2983};
2984
2985#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2986/**
2987 * BP offset of the stack argument slots.
2988 *
2989 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2990 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2991 */
2992DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2993{
2994 IEMNATIVE_FP_OFF_STACK_ARG0,
2995# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2996 IEMNATIVE_FP_OFF_STACK_ARG1,
2997# endif
2998# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2999 IEMNATIVE_FP_OFF_STACK_ARG2,
3000# endif
3001# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3002 IEMNATIVE_FP_OFF_STACK_ARG3,
3003# endif
3004};
3005AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3006#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3007
3008/**
3009 * Info about shadowed guest register values.
3010 * @see IEMNATIVEGSTREG
3011 */
3012static struct
3013{
3014 /** Offset in VMCPU. */
3015 uint32_t off;
3016 /** The field size. */
3017 uint8_t cb;
3018 /** Name (for logging). */
3019 const char *pszName;
3020} const g_aGstShadowInfo[] =
3021{
3022#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3023 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3024 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3025 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3026 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3027 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3028 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3029 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3030 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3031 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3032 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3033 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3034 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3035 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3036 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3037 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3038 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3039 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3040 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3041 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3042 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3043 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3044 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3045 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3046 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3047 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3048 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3049 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3050 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3051 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3052 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3053 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3054 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3055 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3056 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3057 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3058 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3059 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3060 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3061 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3062 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3063 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3064 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3065#undef CPUMCTX_OFF_AND_SIZE
3066};
3067AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3068
3069
3070/** Host CPU general purpose register names. */
3071DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3072{
3073#ifdef RT_ARCH_AMD64
3074 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3075#elif RT_ARCH_ARM64
3076 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3077 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3078#else
3079# error "port me"
3080#endif
3081};
3082
3083
3084DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3085 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3086{
3087 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3088
3089 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3090 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3091 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3092 return (uint8_t)idxReg;
3093}
3094
3095
3096/**
3097 * Tries to locate a suitable register in the given register mask.
3098 *
3099 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3100 * failed.
3101 *
3102 * @returns Host register number on success, returns UINT8_MAX on failure.
3103 */
3104static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3105{
3106 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3107 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3108 if (fRegs)
3109 {
3110 /** @todo pick better here: */
3111 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3112
3113 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3114 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3115 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3116 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3117
3118 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3119 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3120 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3121 return idxReg;
3122 }
3123 return UINT8_MAX;
3124}
3125
3126
3127/**
3128 * Locate a register, possibly freeing one up.
3129 *
3130 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3131 * failed.
3132 *
3133 * @returns Host register number on success. Returns UINT8_MAX if no registers
3134 * found, the caller is supposed to deal with this and raise a
3135 * allocation type specific status code (if desired).
3136 *
3137 * @throws VBox status code if we're run into trouble spilling a variable of
3138 * recording debug info. Does NOT throw anything if we're out of
3139 * registers, though.
3140 */
3141static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3142 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3143{
3144 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3145 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3146
3147 /*
3148 * Try a freed register that's shadowing a guest register
3149 */
3150 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3151 if (fRegs)
3152 {
3153 unsigned const idxReg = (fPreferVolatile
3154 ? ASMBitFirstSetU32(fRegs)
3155 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3156 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
3157 - 1;
3158
3159 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3160 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3161 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3162 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3163
3164 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3165 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3166 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3167 return idxReg;
3168 }
3169
3170 /*
3171 * Try free up a variable that's in a register.
3172 *
3173 * We do two rounds here, first evacuating variables we don't need to be
3174 * saved on the stack, then in the second round move things to the stack.
3175 */
3176 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3177 {
3178 uint32_t fVars = pReNative->Core.bmVars;
3179 while (fVars)
3180 {
3181 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3182 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3183 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3184 && (RT_BIT_32(idxReg) & fRegMask)
3185 && ( iLoop == 0
3186 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3187 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3188 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3189 {
3190 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3191 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3192 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3193 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3194 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3195 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3196
3197 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3198 {
3199 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3200 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3201 }
3202
3203 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3204 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3205
3206 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3207 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3208 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3209 return idxReg;
3210 }
3211 fVars &= ~RT_BIT_32(idxVar);
3212 }
3213 }
3214
3215 return UINT8_MAX;
3216}
3217
3218
3219/**
3220 * Reassigns a variable to a different register specified by the caller.
3221 *
3222 * @returns The new code buffer position.
3223 * @param pReNative The native recompile state.
3224 * @param off The current code buffer position.
3225 * @param idxVar The variable index.
3226 * @param idxRegOld The old host register number.
3227 * @param idxRegNew The new host register number.
3228 * @param pszCaller The caller for logging.
3229 */
3230static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3231 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3232{
3233 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3234 RT_NOREF(pszCaller);
3235
3236 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3237
3238 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3239 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3240 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3241 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3242
3243 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3244 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3245 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3246 if (fGstRegShadows)
3247 {
3248 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3249 | RT_BIT_32(idxRegNew);
3250 while (fGstRegShadows)
3251 {
3252 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3253 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3254
3255 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3256 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3257 }
3258 }
3259
3260 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3261 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3262 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3263 return off;
3264}
3265
3266
3267/**
3268 * Moves a variable to a different register or spills it onto the stack.
3269 *
3270 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3271 * kinds can easily be recreated if needed later.
3272 *
3273 * @returns The new code buffer position.
3274 * @param pReNative The native recompile state.
3275 * @param off The current code buffer position.
3276 * @param idxVar The variable index.
3277 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3278 * call-volatile registers.
3279 */
3280static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3281 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3282{
3283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3284 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3285 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3286
3287 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3288 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3289 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3290 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3291 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3292 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3293 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3294 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3295 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3296
3297
3298 /** @todo Add statistics on this.*/
3299 /** @todo Implement basic variable liveness analysis (python) so variables
3300 * can be freed immediately once no longer used. This has the potential to
3301 * be trashing registers and stack for dead variables. */
3302
3303 /*
3304 * First try move it to a different register, as that's cheaper.
3305 */
3306 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3307 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3308 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3309 if (fRegs)
3310 {
3311 /* Avoid using shadow registers, if possible. */
3312 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3313 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3314 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3315 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3316 }
3317
3318 /*
3319 * Otherwise we must spill the register onto the stack.
3320 */
3321 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3322 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3323 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3324 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3325
3326 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3327 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3328 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3329 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3330 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3331 return off;
3332}
3333
3334
3335/**
3336 * Allocates a temporary host general purpose register.
3337 *
3338 * This may emit code to save register content onto the stack in order to free
3339 * up a register.
3340 *
3341 * @returns The host register number; throws VBox status code on failure,
3342 * so no need to check the return value.
3343 * @param pReNative The native recompile state.
3344 * @param poff Pointer to the variable with the code buffer position.
3345 * This will be update if we need to move a variable from
3346 * register to stack in order to satisfy the request.
3347 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3348 * registers (@c true, default) or the other way around
3349 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3350 */
3351DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3352{
3353 /*
3354 * Try find a completely unused register, preferably a call-volatile one.
3355 */
3356 uint8_t idxReg;
3357 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3358 & ~pReNative->Core.bmHstRegsWithGstShadow
3359 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3360 if (fRegs)
3361 {
3362 if (fPreferVolatile)
3363 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3364 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3365 else
3366 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3367 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3368 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3369 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3370 }
3371 else
3372 {
3373 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3374 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3375 }
3376 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3377}
3378
3379
3380/**
3381 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3382 * registers.
3383 *
3384 * @returns The host register number; throws VBox status code on failure,
3385 * so no need to check the return value.
3386 * @param pReNative The native recompile state.
3387 * @param poff Pointer to the variable with the code buffer position.
3388 * This will be update if we need to move a variable from
3389 * register to stack in order to satisfy the request.
3390 * @param fRegMask Mask of acceptable registers.
3391 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3392 * registers (@c true, default) or the other way around
3393 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3394 */
3395DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3396 bool fPreferVolatile /*= true*/)
3397{
3398 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3399 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3400
3401 /*
3402 * Try find a completely unused register, preferably a call-volatile one.
3403 */
3404 uint8_t idxReg;
3405 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3406 & ~pReNative->Core.bmHstRegsWithGstShadow
3407 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3408 & fRegMask;
3409 if (fRegs)
3410 {
3411 if (fPreferVolatile)
3412 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3413 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3414 else
3415 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3416 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3417 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3418 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3419 }
3420 else
3421 {
3422 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3423 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3424 }
3425 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3426}
3427
3428
3429/**
3430 * Allocates a temporary register for loading an immediate value into.
3431 *
3432 * This will emit code to load the immediate, unless there happens to be an
3433 * unused register with the value already loaded.
3434 *
3435 * The caller will not modify the returned register, it must be considered
3436 * read-only. Free using iemNativeRegFreeTmpImm.
3437 *
3438 * @returns The host register number; throws VBox status code on failure, so no
3439 * need to check the return value.
3440 * @param pReNative The native recompile state.
3441 * @param poff Pointer to the variable with the code buffer position.
3442 * @param uImm The immediate value that the register must hold upon
3443 * return.
3444 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3445 * registers (@c true, default) or the other way around
3446 * (@c false).
3447 *
3448 * @note Reusing immediate values has not been implemented yet.
3449 */
3450DECL_HIDDEN_THROW(uint8_t)
3451iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3452{
3453 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3454 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3455 return idxReg;
3456}
3457
3458
3459/**
3460 * Marks host register @a idxHstReg as containing a shadow copy of guest
3461 * register @a enmGstReg.
3462 *
3463 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3464 * host register before calling.
3465 */
3466DECL_FORCE_INLINE(void)
3467iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3468{
3469 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3470 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3471 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3472
3473 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3474 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
3475 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3476 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3477#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3478 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3479 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3480#else
3481 RT_NOREF(off);
3482#endif
3483}
3484
3485
3486/**
3487 * Clear any guest register shadow claims from @a idxHstReg.
3488 *
3489 * The register does not need to be shadowing any guest registers.
3490 */
3491DECL_FORCE_INLINE(void)
3492iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3493{
3494 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3495 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3496 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3497 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3498 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3499
3500#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3501 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3502 if (fGstRegs)
3503 {
3504 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3505 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3506 while (fGstRegs)
3507 {
3508 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3509 fGstRegs &= ~RT_BIT_64(iGstReg);
3510 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3511 }
3512 }
3513#else
3514 RT_NOREF(off);
3515#endif
3516
3517 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3518 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3519 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3520}
3521
3522
3523/**
3524 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3525 * and global overview flags.
3526 */
3527DECL_FORCE_INLINE(void)
3528iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3529{
3530 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3531 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3532 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3533 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3534 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3535 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3536 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3537
3538#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3539 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3540 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3541#else
3542 RT_NOREF(off);
3543#endif
3544
3545 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3546 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3547 if (!fGstRegShadowsNew)
3548 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3549 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
3550}
3551
3552
3553/**
3554 * Clear any guest register shadow claim for @a enmGstReg.
3555 */
3556DECL_FORCE_INLINE(void)
3557iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3558{
3559 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3560 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3561 {
3562 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3563 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3564 }
3565}
3566
3567
3568/**
3569 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
3570 * as the new shadow of it.
3571 */
3572DECL_FORCE_INLINE(void)
3573iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
3574 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3575{
3576 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3577 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3578 {
3579 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3580 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
3581 return;
3582 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3583 }
3584 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
3585}
3586
3587
3588/**
3589 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
3590 * to @a idxRegTo.
3591 */
3592DECL_FORCE_INLINE(void)
3593iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
3594 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3595{
3596 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
3597 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
3598 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
3599 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
3600 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3601 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
3602 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
3603 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
3604 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
3605
3606 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3607 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
3608 if (!fGstRegShadowsFrom)
3609 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
3610 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
3611 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
3612 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
3613#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3614 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3615 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
3616#else
3617 RT_NOREF(off);
3618#endif
3619}
3620
3621
3622/**
3623 * Allocates a temporary host general purpose register for keeping a guest
3624 * register value.
3625 *
3626 * Since we may already have a register holding the guest register value,
3627 * code will be emitted to do the loading if that's not the case. Code may also
3628 * be emitted if we have to free up a register to satify the request.
3629 *
3630 * @returns The host register number; throws VBox status code on failure, so no
3631 * need to check the return value.
3632 * @param pReNative The native recompile state.
3633 * @param poff Pointer to the variable with the code buffer
3634 * position. This will be update if we need to move a
3635 * variable from register to stack in order to satisfy
3636 * the request.
3637 * @param enmGstReg The guest register that will is to be updated.
3638 * @param enmIntendedUse How the caller will be using the host register.
3639 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3640 * register is okay (default). The ASSUMPTION here is
3641 * that the caller has already flushed all volatile
3642 * registers, so this is only applied if we allocate a
3643 * new register.
3644 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3645 */
3646DECL_HIDDEN_THROW(uint8_t)
3647iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3648 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3649 bool fNoVolatileRegs /*= false*/)
3650{
3651 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3652#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3653 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3654#endif
3655 uint32_t const fRegMask = !fNoVolatileRegs
3656 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3657 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3658
3659 /*
3660 * First check if the guest register value is already in a host register.
3661 */
3662 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3663 {
3664 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3665 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3666 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3667 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3668
3669 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3670 {
3671 /*
3672 * If the register will trash the guest shadow copy, try find a
3673 * completely unused register we can use instead. If that fails,
3674 * we need to disassociate the host reg from the guest reg.
3675 */
3676 /** @todo would be nice to know if preserving the register is in any way helpful. */
3677 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3678 && ( ~pReNative->Core.bmHstRegs
3679 & ~pReNative->Core.bmHstRegsWithGstShadow
3680 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3681 {
3682 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3683
3684 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3685
3686 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3687 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3688 g_apszIemNativeHstRegNames[idxRegNew]));
3689 idxReg = idxRegNew;
3690 }
3691 else
3692 {
3693 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3694 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3695 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3696 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3697 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3698 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3699 else
3700 {
3701 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3702 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3703 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3704 }
3705 }
3706 }
3707 else
3708 {
3709 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3710 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3711 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3712 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3713
3714 /*
3715 * Allocate a new register, copy the value and, if updating, the
3716 * guest shadow copy assignment to the new register.
3717 */
3718 /** @todo share register for readonly access. */
3719 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3720 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3721
3722 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3723 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3724
3725 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3726 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3727 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3728 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3729 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3730 else
3731 {
3732 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3733 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3734 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3735 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3736 }
3737 idxReg = idxRegNew;
3738 }
3739 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3740
3741#ifdef VBOX_STRICT
3742 /* Strict builds: Check that the value is correct. */
3743 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3744#endif
3745
3746 return idxReg;
3747 }
3748
3749 /*
3750 * Allocate a new register, load it with the guest value and designate it as a copy of the
3751 */
3752 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3753
3754 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3755 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3756
3757 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3758 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3759 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3760 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3761
3762 return idxRegNew;
3763}
3764
3765
3766/**
3767 * Allocates a temporary host general purpose register that already holds the
3768 * given guest register value.
3769 *
3770 * The use case for this function is places where the shadowing state cannot be
3771 * modified due to branching and such. This will fail if the we don't have a
3772 * current shadow copy handy or if it's incompatible. The only code that will
3773 * be emitted here is value checking code in strict builds.
3774 *
3775 * The intended use can only be readonly!
3776 *
3777 * @returns The host register number, UINT8_MAX if not present.
3778 * @param pReNative The native recompile state.
3779 * @param poff Pointer to the instruction buffer offset.
3780 * Will be updated in strict builds if a register is
3781 * found.
3782 * @param enmGstReg The guest register that will is to be updated.
3783 * @note In strict builds, this may throw instruction buffer growth failures.
3784 * Non-strict builds will not throw anything.
3785 * @sa iemNativeRegAllocTmpForGuestReg
3786 */
3787DECL_HIDDEN_THROW(uint8_t)
3788iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3789{
3790 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3791
3792 /*
3793 * First check if the guest register value is already in a host register.
3794 */
3795 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3796 {
3797 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3798 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3799 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3800 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3801
3802 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3803 {
3804 /*
3805 * We only do readonly use here, so easy compared to the other
3806 * variant of this code.
3807 */
3808 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3809 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3810 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3811 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3812 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3813
3814#ifdef VBOX_STRICT
3815 /* Strict builds: Check that the value is correct. */
3816 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3817#else
3818 RT_NOREF(poff);
3819#endif
3820 return idxReg;
3821 }
3822 }
3823
3824 return UINT8_MAX;
3825}
3826
3827
3828DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
3829
3830
3831/**
3832 * Allocates argument registers for a function call.
3833 *
3834 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3835 * need to check the return value.
3836 * @param pReNative The native recompile state.
3837 * @param off The current code buffer offset.
3838 * @param cArgs The number of arguments the function call takes.
3839 */
3840DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3841{
3842 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3843 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3844 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3845 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3846
3847 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3848 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3849 else if (cArgs == 0)
3850 return true;
3851
3852 /*
3853 * Do we get luck and all register are free and not shadowing anything?
3854 */
3855 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3856 for (uint32_t i = 0; i < cArgs; i++)
3857 {
3858 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3859 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3860 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3861 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3862 }
3863 /*
3864 * Okay, not lucky so we have to free up the registers.
3865 */
3866 else
3867 for (uint32_t i = 0; i < cArgs; i++)
3868 {
3869 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3870 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3871 {
3872 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3873 {
3874 case kIemNativeWhat_Var:
3875 {
3876 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3877 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
3878 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3879 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3880 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3881
3882 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3883 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3884 else
3885 {
3886 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3887 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3888 }
3889 break;
3890 }
3891
3892 case kIemNativeWhat_Tmp:
3893 case kIemNativeWhat_Arg:
3894 case kIemNativeWhat_rc:
3895 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3896 default:
3897 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3898 }
3899
3900 }
3901 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3902 {
3903 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3904 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3905 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3906 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3907 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3908 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3909 }
3910 else
3911 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3912 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3913 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3914 }
3915 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3916 return true;
3917}
3918
3919
3920DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3921
3922
3923#if 0
3924/**
3925 * Frees a register assignment of any type.
3926 *
3927 * @param pReNative The native recompile state.
3928 * @param idxHstReg The register to free.
3929 *
3930 * @note Does not update variables.
3931 */
3932DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3933{
3934 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3935 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3936 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3937 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3938 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3939 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3940 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3941 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3942 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3943 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3944 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3945 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3946 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3947 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3948
3949 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3950 /* no flushing, right:
3951 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3952 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3953 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3954 */
3955}
3956#endif
3957
3958
3959/**
3960 * Frees a temporary register.
3961 *
3962 * Any shadow copies of guest registers assigned to the host register will not
3963 * be flushed by this operation.
3964 */
3965DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3966{
3967 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3968 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3969 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3970 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3971 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3972}
3973
3974
3975/**
3976 * Frees a temporary immediate register.
3977 *
3978 * It is assumed that the call has not modified the register, so it still hold
3979 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3980 */
3981DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3982{
3983 iemNativeRegFreeTmp(pReNative, idxHstReg);
3984}
3985
3986
3987/**
3988 * Frees a register assigned to a variable.
3989 *
3990 * The register will be disassociated from the variable.
3991 */
3992DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3993{
3994 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3995 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3996 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3998 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
3999
4000 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4001 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4002 if (!fFlushShadows)
4003 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4004 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4005 else
4006 {
4007 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4008 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4010 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4011 uint64_t fGstRegShadows = fGstRegShadowsOld;
4012 while (fGstRegShadows)
4013 {
4014 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4015 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4016
4017 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4018 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4019 }
4020 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4021 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4022 }
4023}
4024
4025
4026/**
4027 * Called right before emitting a call instruction to move anything important
4028 * out of call-volatile registers, free and flush the call-volatile registers,
4029 * optionally freeing argument variables.
4030 *
4031 * @returns New code buffer offset, UINT32_MAX on failure.
4032 * @param pReNative The native recompile state.
4033 * @param off The code buffer offset.
4034 * @param cArgs The number of arguments the function call takes.
4035 * It is presumed that the host register part of these have
4036 * been allocated as such already and won't need moving,
4037 * just freeing.
4038 * @param fKeepVars Mask of variables that should keep their register
4039 * assignments. Caller must take care to handle these.
4040 */
4041DECL_HIDDEN_THROW(uint32_t)
4042iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4043{
4044 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4045
4046 /* fKeepVars will reduce this mask. */
4047 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4048
4049 /*
4050 * Move anything important out of volatile registers.
4051 */
4052 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4053 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4054 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4055#ifdef IEMNATIVE_REG_FIXED_TMP0
4056 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4057#endif
4058 & ~g_afIemNativeCallRegs[cArgs];
4059
4060 fRegsToMove &= pReNative->Core.bmHstRegs;
4061 if (!fRegsToMove)
4062 { /* likely */ }
4063 else
4064 {
4065 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4066 while (fRegsToMove != 0)
4067 {
4068 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4069 fRegsToMove &= ~RT_BIT_32(idxReg);
4070
4071 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4072 {
4073 case kIemNativeWhat_Var:
4074 {
4075 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4076 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4077 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4078 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4079 if (!(RT_BIT_32(idxVar) & fKeepVars))
4080 {
4081 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4082 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4083 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4084 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4085 else
4086 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4087 }
4088 else
4089 fRegsToFree &= ~RT_BIT_32(idxReg);
4090 continue;
4091 }
4092
4093 case kIemNativeWhat_Arg:
4094 AssertMsgFailed(("What?!?: %u\n", idxReg));
4095 continue;
4096
4097 case kIemNativeWhat_rc:
4098 case kIemNativeWhat_Tmp:
4099 AssertMsgFailed(("Missing free: %u\n", idxReg));
4100 continue;
4101
4102 case kIemNativeWhat_FixedTmp:
4103 case kIemNativeWhat_pVCpuFixed:
4104 case kIemNativeWhat_pCtxFixed:
4105 case kIemNativeWhat_FixedReserved:
4106 case kIemNativeWhat_Invalid:
4107 case kIemNativeWhat_End:
4108 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4109 }
4110 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4111 }
4112 }
4113
4114 /*
4115 * Do the actual freeing.
4116 */
4117 if (pReNative->Core.bmHstRegs & fRegsToFree)
4118 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4119 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4120 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4121
4122 /* If there are guest register shadows in any call-volatile register, we
4123 have to clear the corrsponding guest register masks for each register. */
4124 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4125 if (fHstRegsWithGstShadow)
4126 {
4127 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4128 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4129 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4130 do
4131 {
4132 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4133 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4134
4135 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4136 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4137 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4138 } while (fHstRegsWithGstShadow != 0);
4139 }
4140
4141 return off;
4142}
4143
4144
4145/**
4146 * Flushes a set of guest register shadow copies.
4147 *
4148 * This is usually done after calling a threaded function or a C-implementation
4149 * of an instruction.
4150 *
4151 * @param pReNative The native recompile state.
4152 * @param fGstRegs Set of guest registers to flush.
4153 */
4154DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4155{
4156 /*
4157 * Reduce the mask by what's currently shadowed
4158 */
4159 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4160 fGstRegs &= bmGstRegShadowsOld;
4161 if (fGstRegs)
4162 {
4163 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4164 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4165 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4166 if (bmGstRegShadowsNew)
4167 {
4168 /*
4169 * Partial.
4170 */
4171 do
4172 {
4173 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4174 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4175 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4176 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4177 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4178
4179 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4180 fGstRegs &= ~fInThisHstReg;
4181 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4182 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4183 if (!fGstRegShadowsNew)
4184 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4185 } while (fGstRegs != 0);
4186 }
4187 else
4188 {
4189 /*
4190 * Clear all.
4191 */
4192 do
4193 {
4194 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4195 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4196 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4197 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4198 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4199
4200 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4201 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4202 } while (fGstRegs != 0);
4203 pReNative->Core.bmHstRegsWithGstShadow = 0;
4204 }
4205 }
4206}
4207
4208
4209/**
4210 * Flushes guest register shadow copies held by a set of host registers.
4211 *
4212 * This is used with the TLB lookup code for ensuring that we don't carry on
4213 * with any guest shadows in volatile registers, as these will get corrupted by
4214 * a TLB miss.
4215 *
4216 * @param pReNative The native recompile state.
4217 * @param fHstRegs Set of host registers to flush guest shadows for.
4218 */
4219DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4220{
4221 /*
4222 * Reduce the mask by what's currently shadowed.
4223 */
4224 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4225 fHstRegs &= bmHstRegsWithGstShadowOld;
4226 if (fHstRegs)
4227 {
4228 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4229 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4230 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4231 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4232 if (bmHstRegsWithGstShadowNew)
4233 {
4234 /*
4235 * Partial (likely).
4236 */
4237 uint64_t fGstShadows = 0;
4238 do
4239 {
4240 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4241 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4242 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4243 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4244
4245 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4246 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4247 fHstRegs &= ~RT_BIT_32(idxHstReg);
4248 } while (fHstRegs != 0);
4249 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4250 }
4251 else
4252 {
4253 /*
4254 * Clear all.
4255 */
4256 do
4257 {
4258 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4259 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4260 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4261 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4262
4263 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4264 fHstRegs &= ~RT_BIT_32(idxHstReg);
4265 } while (fHstRegs != 0);
4266 pReNative->Core.bmGstRegShadows = 0;
4267 }
4268 }
4269}
4270
4271
4272/**
4273 * Restores guest shadow copies in volatile registers.
4274 *
4275 * This is used after calling a helper function (think TLB miss) to restore the
4276 * register state of volatile registers.
4277 *
4278 * @param pReNative The native recompile state.
4279 * @param fHstRegs Set of host registers to flush guest shadows for.
4280 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4281 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4282 */
4283DECL_HIDDEN_THROW(uint32_t)
4284iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4285{
4286 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4287 if (fHstRegs)
4288 {
4289 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4290 do
4291 {
4292 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4293
4294 /* It's not fatal if a register is active holding a variable that
4295 shadowing a guest register, ASSUMING all pending guest register
4296 writes were flushed prior to the helper call. However, we'll be
4297 emitting duplicate restores, so it wasts code space. */
4298 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4299 RT_NOREF(fHstRegsActiveShadows);
4300
4301 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4302 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4303 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4304 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4305
4306 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4307 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4308
4309 fHstRegs &= ~RT_BIT_32(idxHstReg);
4310 } while (fHstRegs != 0);
4311 }
4312 return off;
4313}
4314
4315
4316/**
4317 * Flushes delayed write of a specific guest register.
4318 *
4319 * This must be called prior to calling CImpl functions and any helpers that use
4320 * the guest state (like raising exceptions) and such.
4321 *
4322 * This optimization has not yet been implemented. The first target would be
4323 * RIP updates, since these are the most common ones.
4324 */
4325DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4326 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4327{
4328 RT_NOREF(pReNative, enmClass, idxReg);
4329 return off;
4330}
4331
4332
4333/**
4334 * Flushes any delayed guest register writes.
4335 *
4336 * This must be called prior to calling CImpl functions and any helpers that use
4337 * the guest state (like raising exceptions) and such.
4338 *
4339 * This optimization has not yet been implemented. The first target would be
4340 * RIP updates, since these are the most common ones.
4341 */
4342DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4343{
4344 RT_NOREF(pReNative, off);
4345 return off;
4346}
4347
4348
4349#ifdef VBOX_STRICT
4350/**
4351 * Does internal register allocator sanity checks.
4352 */
4353static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
4354{
4355 /*
4356 * Iterate host registers building a guest shadowing set.
4357 */
4358 uint64_t bmGstRegShadows = 0;
4359 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
4360 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
4361 while (bmHstRegsWithGstShadow)
4362 {
4363 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
4364 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4365 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4366
4367 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4368 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
4369 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
4370 bmGstRegShadows |= fThisGstRegShadows;
4371 while (fThisGstRegShadows)
4372 {
4373 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
4374 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
4375 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
4376 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
4377 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
4378 }
4379 }
4380 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
4381 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
4382 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
4383
4384 /*
4385 * Now the other way around, checking the guest to host index array.
4386 */
4387 bmHstRegsWithGstShadow = 0;
4388 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
4389 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4390 while (bmGstRegShadows)
4391 {
4392 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
4393 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4394 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
4395
4396 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4397 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
4398 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
4399 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
4400 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4401 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4402 }
4403 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4404 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4405 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4406}
4407#endif
4408
4409
4410/*********************************************************************************************************************************
4411* Code Emitters (larger snippets) *
4412*********************************************************************************************************************************/
4413
4414/**
4415 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4416 * extending to 64-bit width.
4417 *
4418 * @returns New code buffer offset on success, UINT32_MAX on failure.
4419 * @param pReNative .
4420 * @param off The current code buffer position.
4421 * @param idxHstReg The host register to load the guest register value into.
4422 * @param enmGstReg The guest register to load.
4423 *
4424 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4425 * that is something the caller needs to do if applicable.
4426 */
4427DECL_HIDDEN_THROW(uint32_t)
4428iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4429{
4430 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4431 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4432
4433 switch (g_aGstShadowInfo[enmGstReg].cb)
4434 {
4435 case sizeof(uint64_t):
4436 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4437 case sizeof(uint32_t):
4438 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4439 case sizeof(uint16_t):
4440 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4441#if 0 /* not present in the table. */
4442 case sizeof(uint8_t):
4443 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4444#endif
4445 default:
4446 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4447 }
4448}
4449
4450
4451#ifdef VBOX_STRICT
4452/**
4453 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4454 *
4455 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4456 * Trashes EFLAGS on AMD64.
4457 */
4458static uint32_t
4459iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4460{
4461# ifdef RT_ARCH_AMD64
4462 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4463
4464 /* rol reg64, 32 */
4465 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4466 pbCodeBuf[off++] = 0xc1;
4467 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4468 pbCodeBuf[off++] = 32;
4469
4470 /* test reg32, ffffffffh */
4471 if (idxReg >= 8)
4472 pbCodeBuf[off++] = X86_OP_REX_B;
4473 pbCodeBuf[off++] = 0xf7;
4474 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4475 pbCodeBuf[off++] = 0xff;
4476 pbCodeBuf[off++] = 0xff;
4477 pbCodeBuf[off++] = 0xff;
4478 pbCodeBuf[off++] = 0xff;
4479
4480 /* je/jz +1 */
4481 pbCodeBuf[off++] = 0x74;
4482 pbCodeBuf[off++] = 0x01;
4483
4484 /* int3 */
4485 pbCodeBuf[off++] = 0xcc;
4486
4487 /* rol reg64, 32 */
4488 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4489 pbCodeBuf[off++] = 0xc1;
4490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4491 pbCodeBuf[off++] = 32;
4492
4493# elif defined(RT_ARCH_ARM64)
4494 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4495 /* lsr tmp0, reg64, #32 */
4496 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4497 /* cbz tmp0, +1 */
4498 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4499 /* brk #0x1100 */
4500 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4501
4502# else
4503# error "Port me!"
4504# endif
4505 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4506 return off;
4507}
4508#endif /* VBOX_STRICT */
4509
4510
4511#ifdef VBOX_STRICT
4512/**
4513 * Emitting code that checks that the content of register @a idxReg is the same
4514 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4515 * instruction if that's not the case.
4516 *
4517 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4518 * Trashes EFLAGS on AMD64.
4519 */
4520static uint32_t
4521iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
4522{
4523# ifdef RT_ARCH_AMD64
4524 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4525
4526 /* cmp reg, [mem] */
4527 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
4528 {
4529 if (idxReg >= 8)
4530 pbCodeBuf[off++] = X86_OP_REX_R;
4531 pbCodeBuf[off++] = 0x38;
4532 }
4533 else
4534 {
4535 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
4536 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
4537 else
4538 {
4539 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
4540 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4541 else
4542 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
4543 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
4544 if (idxReg >= 8)
4545 pbCodeBuf[off++] = X86_OP_REX_R;
4546 }
4547 pbCodeBuf[off++] = 0x39;
4548 }
4549 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
4550
4551 /* je/jz +1 */
4552 pbCodeBuf[off++] = 0x74;
4553 pbCodeBuf[off++] = 0x01;
4554
4555 /* int3 */
4556 pbCodeBuf[off++] = 0xcc;
4557
4558 /* For values smaller than the register size, we must check that the rest
4559 of the register is all zeros. */
4560 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
4561 {
4562 /* test reg64, imm32 */
4563 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4564 pbCodeBuf[off++] = 0xf7;
4565 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4566 pbCodeBuf[off++] = 0;
4567 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
4568 pbCodeBuf[off++] = 0xff;
4569 pbCodeBuf[off++] = 0xff;
4570
4571 /* je/jz +1 */
4572 pbCodeBuf[off++] = 0x74;
4573 pbCodeBuf[off++] = 0x01;
4574
4575 /* int3 */
4576 pbCodeBuf[off++] = 0xcc;
4577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4578 }
4579 else
4580 {
4581 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4582 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
4583 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
4584 }
4585
4586# elif defined(RT_ARCH_ARM64)
4587 /* mov TMP0, [gstreg] */
4588 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
4589
4590 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4591 /* sub tmp0, tmp0, idxReg */
4592 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
4593 /* cbz tmp0, +1 */
4594 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4595 /* brk #0x1000+enmGstReg */
4596 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
4597 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4598
4599# else
4600# error "Port me!"
4601# endif
4602 return off;
4603}
4604#endif /* VBOX_STRICT */
4605
4606
4607#ifdef VBOX_STRICT
4608/**
4609 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
4610 * important bits.
4611 *
4612 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4613 * Trashes EFLAGS on AMD64.
4614 */
4615static uint32_t
4616iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
4617{
4618 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4619 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
4620 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
4621 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
4622
4623#ifdef RT_ARCH_AMD64
4624 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4625
4626 /* je/jz +1 */
4627 pbCodeBuf[off++] = 0x74;
4628 pbCodeBuf[off++] = 0x01;
4629
4630 /* int3 */
4631 pbCodeBuf[off++] = 0xcc;
4632
4633# elif defined(RT_ARCH_ARM64)
4634 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4635
4636 /* b.eq +1 */
4637 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
4638 /* brk #0x2000 */
4639 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
4640
4641# else
4642# error "Port me!"
4643# endif
4644 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4645
4646 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4647 return off;
4648}
4649#endif /* VBOX_STRICT */
4650
4651
4652/**
4653 * Emits a code for checking the return code of a call and rcPassUp, returning
4654 * from the code if either are non-zero.
4655 */
4656DECL_HIDDEN_THROW(uint32_t)
4657iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
4658{
4659#ifdef RT_ARCH_AMD64
4660 /*
4661 * AMD64: eax = call status code.
4662 */
4663
4664 /* edx = rcPassUp */
4665 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4666# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4667 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
4668# endif
4669
4670 /* edx = eax | rcPassUp */
4671 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4672 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
4673 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
4674 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4675
4676 /* Jump to non-zero status return path. */
4677 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
4678
4679 /* done. */
4680
4681#elif RT_ARCH_ARM64
4682 /*
4683 * ARM64: w0 = call status code.
4684 */
4685# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4686 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
4687# endif
4688 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4689
4690 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4691
4692 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
4693
4694 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4695 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
4696 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
4697
4698#else
4699# error "port me"
4700#endif
4701 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4702 return off;
4703}
4704
4705
4706/**
4707 * Emits code to check if the content of @a idxAddrReg is a canonical address,
4708 * raising a \#GP(0) if it isn't.
4709 *
4710 * @returns New code buffer offset, UINT32_MAX on failure.
4711 * @param pReNative The native recompile state.
4712 * @param off The code buffer offset.
4713 * @param idxAddrReg The host register with the address to check.
4714 * @param idxInstr The current instruction.
4715 */
4716DECL_HIDDEN_THROW(uint32_t)
4717iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
4718{
4719 /*
4720 * Make sure we don't have any outstanding guest register writes as we may
4721 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4722 */
4723 off = iemNativeRegFlushPendingWrites(pReNative, off);
4724
4725#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4726 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4727#else
4728 RT_NOREF(idxInstr);
4729#endif
4730
4731#ifdef RT_ARCH_AMD64
4732 /*
4733 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
4734 * return raisexcpt();
4735 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
4736 */
4737 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4738
4739 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
4740 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
4741 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
4742 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
4743 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4744
4745 iemNativeRegFreeTmp(pReNative, iTmpReg);
4746
4747#elif defined(RT_ARCH_ARM64)
4748 /*
4749 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
4750 * return raisexcpt();
4751 * ----
4752 * mov x1, 0x800000000000
4753 * add x1, x0, x1
4754 * cmp xzr, x1, lsr 48
4755 * b.ne .Lraisexcpt
4756 */
4757 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4758
4759 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
4760 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
4761 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
4762 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4763
4764 iemNativeRegFreeTmp(pReNative, iTmpReg);
4765
4766#else
4767# error "Port me"
4768#endif
4769 return off;
4770}
4771
4772
4773/**
4774 * Emits code to check if the content of @a idxAddrReg is within the limit of
4775 * idxSegReg, raising a \#GP(0) if it isn't.
4776 *
4777 * @returns New code buffer offset; throws VBox status code on error.
4778 * @param pReNative The native recompile state.
4779 * @param off The code buffer offset.
4780 * @param idxAddrReg The host register (32-bit) with the address to
4781 * check.
4782 * @param idxSegReg The segment register (X86_SREG_XXX) to check
4783 * against.
4784 * @param idxInstr The current instruction.
4785 */
4786DECL_HIDDEN_THROW(uint32_t)
4787iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4788 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
4789{
4790 /*
4791 * Make sure we don't have any outstanding guest register writes as we may
4792 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4793 */
4794 off = iemNativeRegFlushPendingWrites(pReNative, off);
4795
4796#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4797 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4798#else
4799 RT_NOREF(idxInstr);
4800#endif
4801
4802 /** @todo implement expand down/whatnot checking */
4803 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
4804
4805 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4806 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
4807 kIemNativeGstRegUse_ForUpdate);
4808
4809 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
4810 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4811
4812 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
4813 return off;
4814}
4815
4816
4817/**
4818 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
4819 *
4820 * @returns The flush mask.
4821 * @param fCImpl The IEM_CIMPL_F_XXX flags.
4822 * @param fGstShwFlush The starting flush mask.
4823 */
4824DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
4825{
4826 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
4827 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
4828 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
4829 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
4830 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
4831 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
4832 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
4833 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
4834 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
4835 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
4836 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
4837 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
4838 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4839 return fGstShwFlush;
4840}
4841
4842
4843/**
4844 * Emits a call to a CImpl function or something similar.
4845 */
4846DECL_HIDDEN_THROW(uint32_t)
4847iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
4848 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
4849{
4850 /*
4851 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
4852 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
4853 */
4854 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
4855 fGstShwFlush
4856 | RT_BIT_64(kIemNativeGstReg_Pc)
4857 | RT_BIT_64(kIemNativeGstReg_EFlags));
4858 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4859
4860 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4861
4862 /*
4863 * Load the parameters.
4864 */
4865#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
4866 /* Special code the hidden VBOXSTRICTRC pointer. */
4867 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4868 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4869 if (cAddParams > 0)
4870 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
4871 if (cAddParams > 1)
4872 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
4873 if (cAddParams > 2)
4874 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
4875 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4876
4877#else
4878 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4879 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4880 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4881 if (cAddParams > 0)
4882 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
4883 if (cAddParams > 1)
4884 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
4885 if (cAddParams > 2)
4886# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
4887 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
4888# else
4889 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
4890# endif
4891#endif
4892
4893 /*
4894 * Make the call.
4895 */
4896 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
4897
4898#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4899 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4900#endif
4901
4902 /*
4903 * Check the status code.
4904 */
4905 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4906}
4907
4908
4909/**
4910 * Emits a call to a threaded worker function.
4911 */
4912DECL_HIDDEN_THROW(uint32_t)
4913iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
4914{
4915 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
4916 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4917
4918#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4919 /* The threaded function may throw / long jmp, so set current instruction
4920 number if we're counting. */
4921 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4922#endif
4923
4924 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
4925
4926#ifdef RT_ARCH_AMD64
4927 /* Load the parameters and emit the call. */
4928# ifdef RT_OS_WINDOWS
4929# ifndef VBOXSTRICTRC_STRICT_ENABLED
4930 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4931 if (cParams > 0)
4932 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
4933 if (cParams > 1)
4934 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
4935 if (cParams > 2)
4936 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
4937# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
4938 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
4939 if (cParams > 0)
4940 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
4941 if (cParams > 1)
4942 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
4943 if (cParams > 2)
4944 {
4945 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
4946 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
4947 }
4948 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4949# endif /* VBOXSTRICTRC_STRICT_ENABLED */
4950# else
4951 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4952 if (cParams > 0)
4953 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
4954 if (cParams > 1)
4955 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
4956 if (cParams > 2)
4957 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
4958# endif
4959
4960 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4961
4962# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4963 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4964# endif
4965
4966#elif RT_ARCH_ARM64
4967 /*
4968 * ARM64:
4969 */
4970 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4971 if (cParams > 0)
4972 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
4973 if (cParams > 1)
4974 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
4975 if (cParams > 2)
4976 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
4977
4978 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4979
4980#else
4981# error "port me"
4982#endif
4983
4984 /*
4985 * Check the status code.
4986 */
4987 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
4988
4989 return off;
4990}
4991
4992
4993/**
4994 * Emits the code at the CheckBranchMiss label.
4995 */
4996static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4997{
4998 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
4999 if (idxLabel != UINT32_MAX)
5000 {
5001 iemNativeLabelDefine(pReNative, idxLabel, off);
5002
5003 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5004 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5005 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5006
5007 /* jump back to the return sequence. */
5008 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5009 }
5010 return off;
5011}
5012
5013
5014/**
5015 * Emits the code at the NeedCsLimChecking label.
5016 */
5017static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5018{
5019 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5020 if (idxLabel != UINT32_MAX)
5021 {
5022 iemNativeLabelDefine(pReNative, idxLabel, off);
5023
5024 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5025 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5026 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5027
5028 /* jump back to the return sequence. */
5029 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5030 }
5031 return off;
5032}
5033
5034
5035/**
5036 * Emits the code at the ObsoleteTb label.
5037 */
5038static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5039{
5040 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5041 if (idxLabel != UINT32_MAX)
5042 {
5043 iemNativeLabelDefine(pReNative, idxLabel, off);
5044
5045 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5046 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5047 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5048
5049 /* jump back to the return sequence. */
5050 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5051 }
5052 return off;
5053}
5054
5055
5056/**
5057 * Emits the code at the RaiseGP0 label.
5058 */
5059static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5060{
5061 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5062 if (idxLabel != UINT32_MAX)
5063 {
5064 iemNativeLabelDefine(pReNative, idxLabel, off);
5065
5066 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5067 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5068 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5069
5070 /* jump back to the return sequence. */
5071 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5072 }
5073 return off;
5074}
5075
5076
5077/**
5078 * Emits the code at the ReturnWithFlags label (returns
5079 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5080 */
5081static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5082{
5083 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5084 if (idxLabel != UINT32_MAX)
5085 {
5086 iemNativeLabelDefine(pReNative, idxLabel, off);
5087
5088 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5089
5090 /* jump back to the return sequence. */
5091 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5092 }
5093 return off;
5094}
5095
5096
5097/**
5098 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5099 */
5100static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5101{
5102 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5103 if (idxLabel != UINT32_MAX)
5104 {
5105 iemNativeLabelDefine(pReNative, idxLabel, off);
5106
5107 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5108
5109 /* jump back to the return sequence. */
5110 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5111 }
5112 return off;
5113}
5114
5115
5116/**
5117 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5118 */
5119static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5120{
5121 /*
5122 * Generate the rc + rcPassUp fiddling code if needed.
5123 */
5124 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5125 if (idxLabel != UINT32_MAX)
5126 {
5127 iemNativeLabelDefine(pReNative, idxLabel, off);
5128
5129 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5130#ifdef RT_ARCH_AMD64
5131# ifdef RT_OS_WINDOWS
5132# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5133 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5134# endif
5135 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5136 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5137# else
5138 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5139 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5140# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5141 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5142# endif
5143# endif
5144# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5145 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5146# endif
5147
5148#else
5149 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5151 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5152#endif
5153
5154 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5155 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5156 }
5157 return off;
5158}
5159
5160
5161/**
5162 * Emits a standard epilog.
5163 */
5164static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5165{
5166 *pidxReturnLabel = UINT32_MAX;
5167
5168 /*
5169 * Successful return, so clear the return register (eax, w0).
5170 */
5171 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5172
5173 /*
5174 * Define label for common return point.
5175 */
5176 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5177 *pidxReturnLabel = idxReturn;
5178
5179 /*
5180 * Restore registers and return.
5181 */
5182#ifdef RT_ARCH_AMD64
5183 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5184
5185 /* Reposition esp at the r15 restore point. */
5186 pbCodeBuf[off++] = X86_OP_REX_W;
5187 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5188 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5189 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5190
5191 /* Pop non-volatile registers and return */
5192 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5193 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5194 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5195 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5196 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5197 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5198 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5199 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5200# ifdef RT_OS_WINDOWS
5201 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5202 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5203# endif
5204 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5205 pbCodeBuf[off++] = 0xc9; /* leave */
5206 pbCodeBuf[off++] = 0xc3; /* ret */
5207 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5208
5209#elif RT_ARCH_ARM64
5210 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5211
5212 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5213 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5214 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5215 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5216 IEMNATIVE_FRAME_VAR_SIZE / 8);
5217 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5218 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5219 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5220 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5221 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5222 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5223 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5224 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5225 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5226 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5227 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5228 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5229
5230 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5231 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5232 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5233 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5234
5235 /* retab / ret */
5236# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5237 if (1)
5238 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5239 else
5240# endif
5241 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5242
5243#else
5244# error "port me"
5245#endif
5246 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5247
5248 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5249}
5250
5251
5252/**
5253 * Emits a standard prolog.
5254 */
5255static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5256{
5257#ifdef RT_ARCH_AMD64
5258 /*
5259 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5260 * reserving 64 bytes for stack variables plus 4 non-register argument
5261 * slots. Fixed register assignment: xBX = pReNative;
5262 *
5263 * Since we always do the same register spilling, we can use the same
5264 * unwind description for all the code.
5265 */
5266 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5267 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5268 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5269 pbCodeBuf[off++] = 0x8b;
5270 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5271 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5272 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5273# ifdef RT_OS_WINDOWS
5274 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5275 pbCodeBuf[off++] = 0x8b;
5276 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5277 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5278 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5279# else
5280 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5281 pbCodeBuf[off++] = 0x8b;
5282 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5283# endif
5284 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5285 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5286 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5287 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5288 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5289 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5290 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5291 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5292
5293 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5294 X86_GREG_xSP,
5295 IEMNATIVE_FRAME_ALIGN_SIZE
5296 + IEMNATIVE_FRAME_VAR_SIZE
5297 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5298 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5299 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5300 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5301 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5302
5303#elif RT_ARCH_ARM64
5304 /*
5305 * We set up a stack frame exactly like on x86, only we have to push the
5306 * return address our selves here. We save all non-volatile registers.
5307 */
5308 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5309
5310# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5311 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5312 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5313 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5314 /* pacibsp */
5315 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5316# endif
5317
5318 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5319 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5320 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5321 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5322 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5323 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5324 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5325 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5326 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5327 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5328 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5329 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5330 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5331 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5332 /* Save the BP and LR (ret address) registers at the top of the frame. */
5333 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5334 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5335 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5336 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5337 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
5338 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
5339
5340 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
5341 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
5342
5343 /* mov r28, r0 */
5344 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
5345 /* mov r27, r1 */
5346 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
5347
5348#else
5349# error "port me"
5350#endif
5351 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5352 return off;
5353}
5354
5355
5356
5357
5358/*********************************************************************************************************************************
5359* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
5360*********************************************************************************************************************************/
5361
5362#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
5363 { \
5364 Assert(pReNative->Core.bmVars == 0); \
5365 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
5366 Assert(pReNative->Core.bmStack == 0); \
5367 pReNative->fMc = (a_fMcFlags); \
5368 pReNative->fCImpl = (a_fCImplFlags); \
5369 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
5370
5371/** We have to get to the end in recompilation mode, as otherwise we won't
5372 * generate code for all the IEM_MC_IF_XXX branches. */
5373#define IEM_MC_END() \
5374 iemNativeVarFreeAll(pReNative); \
5375 } return off
5376
5377
5378
5379/*********************************************************************************************************************************
5380* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
5381*********************************************************************************************************************************/
5382
5383#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
5384 pReNative->fMc = 0; \
5385 pReNative->fCImpl = (a_fFlags); \
5386 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
5387
5388
5389#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5390 pReNative->fMc = 0; \
5391 pReNative->fCImpl = (a_fFlags); \
5392 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
5393
5394DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5395 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5396 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
5397{
5398 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
5399}
5400
5401
5402#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5403 pReNative->fMc = 0; \
5404 pReNative->fCImpl = (a_fFlags); \
5405 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5406 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
5407
5408DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5409 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5410 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
5411{
5412 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
5413}
5414
5415
5416#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5417 pReNative->fMc = 0; \
5418 pReNative->fCImpl = (a_fFlags); \
5419 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5420 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
5421
5422DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5423 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5424 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
5425 uint64_t uArg2)
5426{
5427 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
5428}
5429
5430
5431
5432/*********************************************************************************************************************************
5433* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
5434*********************************************************************************************************************************/
5435
5436/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
5437 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5438DECL_INLINE_THROW(uint32_t)
5439iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5440{
5441 /*
5442 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5443 * return with special status code and make the execution loop deal with
5444 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5445 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5446 * could continue w/o interruption, it probably will drop into the
5447 * debugger, so not worth the effort of trying to services it here and we
5448 * just lump it in with the handling of the others.
5449 *
5450 * To simplify the code and the register state management even more (wrt
5451 * immediate in AND operation), we always update the flags and skip the
5452 * extra check associated conditional jump.
5453 */
5454 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5455 <= UINT32_MAX);
5456 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5457 kIemNativeGstRegUse_ForUpdate);
5458 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5459 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5460 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5461 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5462 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5463
5464 /* Free but don't flush the EFLAGS register. */
5465 iemNativeRegFreeTmp(pReNative, idxEflReg);
5466
5467 return off;
5468}
5469
5470
5471#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
5472 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5473
5474#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
5475 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
5476 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5477
5478/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5479DECL_INLINE_THROW(uint32_t)
5480iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5481{
5482 /* Allocate a temporary PC register. */
5483 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5484
5485 /* Perform the addition and store the result. */
5486 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5487 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5488
5489 /* Free but don't flush the PC register. */
5490 iemNativeRegFreeTmp(pReNative, idxPcReg);
5491
5492 return off;
5493}
5494
5495
5496#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
5497 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5498
5499#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
5500 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
5501 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5502
5503/** Same as iemRegAddToEip32AndFinishingNoFlags. */
5504DECL_INLINE_THROW(uint32_t)
5505iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5506{
5507 /* Allocate a temporary PC register. */
5508 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5509
5510 /* Perform the addition and store the result. */
5511 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5512 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5513
5514 /* Free but don't flush the PC register. */
5515 iemNativeRegFreeTmp(pReNative, idxPcReg);
5516
5517 return off;
5518}
5519
5520
5521#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
5522 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5523
5524#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
5525 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
5526 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5527
5528/** Same as iemRegAddToIp16AndFinishingNoFlags. */
5529DECL_INLINE_THROW(uint32_t)
5530iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5531{
5532 /* Allocate a temporary PC register. */
5533 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5534
5535 /* Perform the addition and store the result. */
5536 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5537 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5538 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5539
5540 /* Free but don't flush the PC register. */
5541 iemNativeRegFreeTmp(pReNative, idxPcReg);
5542
5543 return off;
5544}
5545
5546
5547
5548/*********************************************************************************************************************************
5549* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
5550*********************************************************************************************************************************/
5551
5552#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
5553 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5554 (a_enmEffOpSize), pCallEntry->idxInstr)
5555
5556#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5557 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
5558 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5559
5560#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
5561 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5562 IEMMODE_16BIT, pCallEntry->idxInstr)
5563
5564#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
5565 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
5566 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5567
5568#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
5569 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5570 IEMMODE_64BIT, pCallEntry->idxInstr)
5571
5572#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
5573 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
5574 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5575
5576/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
5577 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
5578 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
5579DECL_INLINE_THROW(uint32_t)
5580iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5581 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5582{
5583 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
5584
5585 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5586 off = iemNativeRegFlushPendingWrites(pReNative, off);
5587
5588 /* Allocate a temporary PC register. */
5589 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5590
5591 /* Perform the addition. */
5592 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
5593
5594 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
5595 {
5596 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5597 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5598 }
5599 else
5600 {
5601 /* Just truncate the result to 16-bit IP. */
5602 Assert(enmEffOpSize == IEMMODE_16BIT);
5603 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5604 }
5605 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5606
5607 /* Free but don't flush the PC register. */
5608 iemNativeRegFreeTmp(pReNative, idxPcReg);
5609
5610 return off;
5611}
5612
5613
5614#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
5615 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5616 (a_enmEffOpSize), pCallEntry->idxInstr)
5617
5618#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5619 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
5620 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5621
5622#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
5623 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5624 IEMMODE_16BIT, pCallEntry->idxInstr)
5625
5626#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
5627 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
5628 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5629
5630#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
5631 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5632 IEMMODE_32BIT, pCallEntry->idxInstr)
5633
5634#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
5635 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
5636 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5637
5638/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
5639 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
5640 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
5641DECL_INLINE_THROW(uint32_t)
5642iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5643 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5644{
5645 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
5646
5647 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5648 off = iemNativeRegFlushPendingWrites(pReNative, off);
5649
5650 /* Allocate a temporary PC register. */
5651 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5652
5653 /* Perform the addition. */
5654 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5655
5656 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
5657 if (enmEffOpSize == IEMMODE_16BIT)
5658 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5659
5660 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
5661 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5662
5663 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5664
5665 /* Free but don't flush the PC register. */
5666 iemNativeRegFreeTmp(pReNative, idxPcReg);
5667
5668 return off;
5669}
5670
5671
5672#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
5673 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
5674
5675#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
5676 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
5677 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5678
5679#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
5680 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
5681
5682#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
5683 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
5684 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5685
5686#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
5687 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
5688
5689#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
5690 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
5691 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5692
5693/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
5694DECL_INLINE_THROW(uint32_t)
5695iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5696 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
5697{
5698 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5699 off = iemNativeRegFlushPendingWrites(pReNative, off);
5700
5701 /* Allocate a temporary PC register. */
5702 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5703
5704 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
5705 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5706 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5707 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5708 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5709
5710 /* Free but don't flush the PC register. */
5711 iemNativeRegFreeTmp(pReNative, idxPcReg);
5712
5713 return off;
5714}
5715
5716
5717
5718/*********************************************************************************************************************************
5719* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
5720*********************************************************************************************************************************/
5721
5722/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
5723#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
5724 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5725
5726/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
5727#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
5728 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5729
5730/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
5731#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
5732 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5733
5734/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
5735 * clears flags. */
5736#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
5737 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
5738 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5739
5740/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
5741 * clears flags. */
5742#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
5743 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
5744 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5745
5746/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
5747 * clears flags. */
5748#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
5749 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
5750 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5751
5752#undef IEM_MC_SET_RIP_U16_AND_FINISH
5753
5754
5755/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
5756#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
5757 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5758
5759/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
5760#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
5761 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5762
5763/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
5764 * clears flags. */
5765#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
5766 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
5767 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5768
5769/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
5770 * and clears flags. */
5771#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
5772 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
5773 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5774
5775#undef IEM_MC_SET_RIP_U32_AND_FINISH
5776
5777
5778/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
5779#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
5780 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
5781
5782/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
5783 * and clears flags. */
5784#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
5785 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
5786 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5787
5788#undef IEM_MC_SET_RIP_U64_AND_FINISH
5789
5790
5791/** Same as iemRegRipJumpU16AndFinishNoFlags,
5792 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
5793DECL_INLINE_THROW(uint32_t)
5794iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
5795 uint8_t idxInstr, uint8_t cbVar)
5796{
5797 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
5798 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
5799
5800 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5801 off = iemNativeRegFlushPendingWrites(pReNative, off);
5802
5803 /* Get a register with the new PC loaded from idxVarPc.
5804 Note! This ASSUMES that the high bits of the GPR is zeroed. */
5805 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
5806
5807 /* Check limit (may #GP(0) + exit TB). */
5808 if (!f64Bit)
5809 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5810 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5811 else if (cbVar > sizeof(uint32_t))
5812 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5813
5814 /* Store the result. */
5815 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5816
5817 /** @todo implictly free the variable? */
5818
5819 return off;
5820}
5821
5822
5823
5824/*********************************************************************************************************************************
5825* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
5826*********************************************************************************************************************************/
5827
5828/**
5829 * Pushes an IEM_MC_IF_XXX onto the condition stack.
5830 *
5831 * @returns Pointer to the condition stack entry on success, NULL on failure
5832 * (too many nestings)
5833 */
5834DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
5835{
5836 uint32_t const idxStack = pReNative->cCondDepth;
5837 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
5838
5839 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
5840 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
5841
5842 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
5843 pEntry->fInElse = false;
5844 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
5845 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
5846
5847 return pEntry;
5848}
5849
5850
5851/**
5852 * Start of the if-block, snapshotting the register and variable state.
5853 */
5854DECL_INLINE_THROW(void)
5855iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
5856{
5857 Assert(offIfBlock != UINT32_MAX);
5858 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5859 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5860 Assert(!pEntry->fInElse);
5861
5862 /* Define the start of the IF block if request or for disassembly purposes. */
5863 if (idxLabelIf != UINT32_MAX)
5864 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
5865#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5866 else
5867 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
5868#else
5869 RT_NOREF(offIfBlock);
5870#endif
5871
5872 /* Copy the initial state so we can restore it in the 'else' block. */
5873 pEntry->InitialState = pReNative->Core;
5874}
5875
5876
5877#define IEM_MC_ELSE() } while (0); \
5878 off = iemNativeEmitElse(pReNative, off); \
5879 do {
5880
5881/** Emits code related to IEM_MC_ELSE. */
5882DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5883{
5884 /* Check sanity and get the conditional stack entry. */
5885 Assert(off != UINT32_MAX);
5886 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5887 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5888 Assert(!pEntry->fInElse);
5889
5890 /* Jump to the endif */
5891 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
5892
5893 /* Define the else label and enter the else part of the condition. */
5894 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5895 pEntry->fInElse = true;
5896
5897 /* Snapshot the core state so we can do a merge at the endif and restore
5898 the snapshot we took at the start of the if-block. */
5899 pEntry->IfFinalState = pReNative->Core;
5900 pReNative->Core = pEntry->InitialState;
5901
5902 return off;
5903}
5904
5905
5906#define IEM_MC_ENDIF() } while (0); \
5907 off = iemNativeEmitEndIf(pReNative, off)
5908
5909/** Emits code related to IEM_MC_ENDIF. */
5910DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5911{
5912 /* Check sanity and get the conditional stack entry. */
5913 Assert(off != UINT32_MAX);
5914 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5915 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5916
5917 /*
5918 * Now we have find common group with the core state at the end of the
5919 * if-final. Use the smallest common denominator and just drop anything
5920 * that isn't the same in both states.
5921 */
5922 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
5923 * which is why we're doing this at the end of the else-block.
5924 * But we'd need more info about future for that to be worth the effort. */
5925 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
5926 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
5927 {
5928 /* shadow guest stuff first. */
5929 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
5930 if (fGstRegs)
5931 {
5932 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
5933 do
5934 {
5935 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5936 fGstRegs &= ~RT_BIT_64(idxGstReg);
5937
5938 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5939 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
5940 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
5941 {
5942 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
5943 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
5944 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
5945 }
5946 } while (fGstRegs);
5947 }
5948 else
5949 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
5950
5951 /* Check variables next. For now we must require them to be identical
5952 or stuff we can recreate. */
5953 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
5954 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
5955 if (fVars)
5956 {
5957 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
5958 do
5959 {
5960 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
5961 fVars &= ~RT_BIT_32(idxVar);
5962
5963 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
5964 {
5965 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
5966 continue;
5967 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5968 {
5969 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5970 if (idxHstReg != UINT8_MAX)
5971 {
5972 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5973 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5974 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
5975 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5976 }
5977 continue;
5978 }
5979 }
5980 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
5981 continue;
5982
5983 /* Irreconcilable, so drop it. */
5984 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5985 if (idxHstReg != UINT8_MAX)
5986 {
5987 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5988 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5989 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
5990 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5991 }
5992 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
5993 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5994 } while (fVars);
5995 }
5996
5997 /* Finally, check that the host register allocations matches. */
5998 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
5999 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
6000 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
6001 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
6002 }
6003
6004 /*
6005 * Define the endif label and maybe the else one if we're still in the 'if' part.
6006 */
6007 if (!pEntry->fInElse)
6008 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6009 else
6010 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
6011 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
6012
6013 /* Pop the conditional stack.*/
6014 pReNative->cCondDepth -= 1;
6015
6016 return off;
6017}
6018
6019
6020#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
6021 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
6022 do {
6023
6024/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
6025DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6026{
6027 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6028
6029 /* Get the eflags. */
6030 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6031 kIemNativeGstRegUse_ReadOnly);
6032
6033 /* Test and jump. */
6034 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6035
6036 /* Free but don't flush the EFlags register. */
6037 iemNativeRegFreeTmp(pReNative, idxEflReg);
6038
6039 /* Make a copy of the core state now as we start the if-block. */
6040 iemNativeCondStartIfBlock(pReNative, off);
6041
6042 return off;
6043}
6044
6045
6046#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
6047 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
6048 do {
6049
6050/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
6051DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6052{
6053 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6054
6055 /* Get the eflags. */
6056 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6057 kIemNativeGstRegUse_ReadOnly);
6058
6059 /* Test and jump. */
6060 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6061
6062 /* Free but don't flush the EFlags register. */
6063 iemNativeRegFreeTmp(pReNative, idxEflReg);
6064
6065 /* Make a copy of the core state now as we start the if-block. */
6066 iemNativeCondStartIfBlock(pReNative, off);
6067
6068 return off;
6069}
6070
6071
6072#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
6073 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
6074 do {
6075
6076/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
6077DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6078{
6079 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6080
6081 /* Get the eflags. */
6082 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6083 kIemNativeGstRegUse_ReadOnly);
6084
6085 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6086 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6087
6088 /* Test and jump. */
6089 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6090
6091 /* Free but don't flush the EFlags register. */
6092 iemNativeRegFreeTmp(pReNative, idxEflReg);
6093
6094 /* Make a copy of the core state now as we start the if-block. */
6095 iemNativeCondStartIfBlock(pReNative, off);
6096
6097 return off;
6098}
6099
6100
6101#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
6102 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
6103 do {
6104
6105/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
6106DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6107{
6108 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6109
6110 /* Get the eflags. */
6111 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6112 kIemNativeGstRegUse_ReadOnly);
6113
6114 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6115 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6116
6117 /* Test and jump. */
6118 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6119
6120 /* Free but don't flush the EFlags register. */
6121 iemNativeRegFreeTmp(pReNative, idxEflReg);
6122
6123 /* Make a copy of the core state now as we start the if-block. */
6124 iemNativeCondStartIfBlock(pReNative, off);
6125
6126 return off;
6127}
6128
6129
6130#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6131 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6132 do {
6133
6134#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6135 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6136 do {
6137
6138/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6139DECL_INLINE_THROW(uint32_t)
6140iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6141 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6142{
6143 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6144
6145 /* Get the eflags. */
6146 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6147 kIemNativeGstRegUse_ReadOnly);
6148
6149 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6150 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6151
6152 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6153 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6154 Assert(iBitNo1 != iBitNo2);
6155
6156#ifdef RT_ARCH_AMD64
6157 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6158
6159 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6160 if (iBitNo1 > iBitNo2)
6161 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6162 else
6163 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6164 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6165
6166#elif defined(RT_ARCH_ARM64)
6167 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6168 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6169
6170 /* and tmpreg, eflreg, #1<<iBitNo1 */
6171 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6172
6173 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6174 if (iBitNo1 > iBitNo2)
6175 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6176 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6177 else
6178 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6179 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6180
6181 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6182
6183#else
6184# error "Port me"
6185#endif
6186
6187 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6188 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6189 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6190
6191 /* Free but don't flush the EFlags and tmp registers. */
6192 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6193 iemNativeRegFreeTmp(pReNative, idxEflReg);
6194
6195 /* Make a copy of the core state now as we start the if-block. */
6196 iemNativeCondStartIfBlock(pReNative, off);
6197
6198 return off;
6199}
6200
6201
6202#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6203 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6204 do {
6205
6206#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6207 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6208 do {
6209
6210/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6211 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6212DECL_INLINE_THROW(uint32_t)
6213iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6214 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6215{
6216 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6217
6218 /* We need an if-block label for the non-inverted variant. */
6219 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6220 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6221
6222 /* Get the eflags. */
6223 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6224 kIemNativeGstRegUse_ReadOnly);
6225
6226 /* Translate the flag masks to bit numbers. */
6227 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6228 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6229
6230 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6231 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6232 Assert(iBitNo1 != iBitNo);
6233
6234 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6235 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6236 Assert(iBitNo2 != iBitNo);
6237 Assert(iBitNo2 != iBitNo1);
6238
6239#ifdef RT_ARCH_AMD64
6240 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6241#elif defined(RT_ARCH_ARM64)
6242 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6243#endif
6244
6245 /* Check for the lone bit first. */
6246 if (!fInverted)
6247 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6248 else
6249 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6250
6251 /* Then extract and compare the other two bits. */
6252#ifdef RT_ARCH_AMD64
6253 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6254 if (iBitNo1 > iBitNo2)
6255 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6256 else
6257 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6258 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6259
6260#elif defined(RT_ARCH_ARM64)
6261 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6262
6263 /* and tmpreg, eflreg, #1<<iBitNo1 */
6264 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6265
6266 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6267 if (iBitNo1 > iBitNo2)
6268 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6269 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6270 else
6271 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6272 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6273
6274 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6275
6276#else
6277# error "Port me"
6278#endif
6279
6280 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6281 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6282 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6283
6284 /* Free but don't flush the EFlags and tmp registers. */
6285 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6286 iemNativeRegFreeTmp(pReNative, idxEflReg);
6287
6288 /* Make a copy of the core state now as we start the if-block. */
6289 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
6290
6291 return off;
6292}
6293
6294
6295#define IEM_MC_IF_CX_IS_NZ() \
6296 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
6297 do {
6298
6299/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6300DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6301{
6302 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6303
6304 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6305 kIemNativeGstRegUse_ReadOnly);
6306 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6307 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6308
6309 iemNativeCondStartIfBlock(pReNative, off);
6310 return off;
6311}
6312
6313
6314#define IEM_MC_IF_ECX_IS_NZ() \
6315 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
6316 do {
6317
6318#define IEM_MC_IF_RCX_IS_NZ() \
6319 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
6320 do {
6321
6322/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
6323DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6324{
6325 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6326
6327 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6328 kIemNativeGstRegUse_ReadOnly);
6329 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6330 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6331
6332 iemNativeCondStartIfBlock(pReNative, off);
6333 return off;
6334}
6335
6336
6337#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6338 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
6339 do {
6340
6341#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6342 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
6343 do {
6344
6345/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6346DECL_INLINE_THROW(uint32_t)
6347iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
6348{
6349 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6350
6351 /* We have to load both RCX and EFLAGS before we can start branching,
6352 otherwise we'll end up in the else-block with an inconsistent
6353 register allocator state.
6354 Doing EFLAGS first as it's more likely to be loaded, right? */
6355 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6356 kIemNativeGstRegUse_ReadOnly);
6357 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6358 kIemNativeGstRegUse_ReadOnly);
6359
6360 /** @todo we could reduce this to a single branch instruction by spending a
6361 * temporary register and some setnz stuff. Not sure if loops are
6362 * worth it. */
6363 /* Check CX. */
6364 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6365
6366 /* Check the EFlags bit. */
6367 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6368 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6369 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6370 !fCheckIfSet /*fJmpIfSet*/);
6371
6372 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6373 iemNativeRegFreeTmp(pReNative, idxEflReg);
6374
6375 iemNativeCondStartIfBlock(pReNative, off);
6376 return off;
6377}
6378
6379
6380#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6381 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
6382 do {
6383
6384#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6385 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
6386 do {
6387
6388#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6389 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
6390 do {
6391
6392#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6393 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
6394 do {
6395
6396/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
6397 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
6398 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
6399 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
6400DECL_INLINE_THROW(uint32_t)
6401iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6402 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
6403{
6404 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6405
6406 /* We have to load both RCX and EFLAGS before we can start branching,
6407 otherwise we'll end up in the else-block with an inconsistent
6408 register allocator state.
6409 Doing EFLAGS first as it's more likely to be loaded, right? */
6410 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6411 kIemNativeGstRegUse_ReadOnly);
6412 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6413 kIemNativeGstRegUse_ReadOnly);
6414
6415 /** @todo we could reduce this to a single branch instruction by spending a
6416 * temporary register and some setnz stuff. Not sure if loops are
6417 * worth it. */
6418 /* Check RCX/ECX. */
6419 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6420
6421 /* Check the EFlags bit. */
6422 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6423 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6424 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6425 !fCheckIfSet /*fJmpIfSet*/);
6426
6427 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6428 iemNativeRegFreeTmp(pReNative, idxEflReg);
6429
6430 iemNativeCondStartIfBlock(pReNative, off);
6431 return off;
6432}
6433
6434
6435
6436/*********************************************************************************************************************************
6437* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6438*********************************************************************************************************************************/
6439/** Number of hidden arguments for CIMPL calls.
6440 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
6441#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6442# define IEM_CIMPL_HIDDEN_ARGS 3
6443#else
6444# define IEM_CIMPL_HIDDEN_ARGS 2
6445#endif
6446
6447#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
6448 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
6449
6450#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
6451 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
6452
6453#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
6454 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
6455
6456#define IEM_MC_LOCAL(a_Type, a_Name) \
6457 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
6458
6459#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
6460 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
6461
6462
6463/**
6464 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
6465 */
6466DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
6467{
6468 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
6469 return IEM_CIMPL_HIDDEN_ARGS;
6470 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
6471 return 1;
6472 return 0;
6473}
6474
6475
6476/**
6477 * Internal work that allocates a variable with kind set to
6478 * kIemNativeVarKind_Invalid and no current stack allocation.
6479 *
6480 * The kind will either be set by the caller or later when the variable is first
6481 * assigned a value.
6482 */
6483static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6484{
6485 Assert(cbType > 0 && cbType <= 64);
6486 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6487 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6488 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6489 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6490 pReNative->Core.aVars[idxVar].cbVar = cbType;
6491 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6492 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6493 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6494 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6495 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6496 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6497 pReNative->Core.aVars[idxVar].u.uValue = 0;
6498 return idxVar;
6499}
6500
6501
6502/**
6503 * Internal work that allocates an argument variable w/o setting enmKind.
6504 */
6505static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6506{
6507 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6508 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6509 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6510
6511 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6512 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
6513 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6514 return idxVar;
6515}
6516
6517
6518/**
6519 * Gets the stack slot for a stack variable, allocating one if necessary.
6520 *
6521 * Calling this function implies that the stack slot will contain a valid
6522 * variable value. The caller deals with any register currently assigned to the
6523 * variable, typically by spilling it into the stack slot.
6524 *
6525 * @returns The stack slot number.
6526 * @param pReNative The recompiler state.
6527 * @param idxVar The variable.
6528 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6529 */
6530DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6531{
6532 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6533 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6534
6535 /* Already got a slot? */
6536 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6537 if (idxStackSlot != UINT8_MAX)
6538 {
6539 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6540 return idxStackSlot;
6541 }
6542
6543 /*
6544 * A single slot is easy to allocate.
6545 * Allocate them from the top end, closest to BP, to reduce the displacement.
6546 */
6547 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
6548 {
6549 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6550 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6551 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6552 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6553 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
6554 return (uint8_t)iSlot;
6555 }
6556
6557 /*
6558 * We need more than one stack slot.
6559 *
6560 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6561 */
6562 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6563 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
6564 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
6565 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
6566 uint32_t bmStack = ~pReNative->Core.bmStack;
6567 while (bmStack != UINT32_MAX)
6568 {
6569/** @todo allocate from the top to reduce BP displacement. */
6570 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6571 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6572 if (!(iSlot & fBitAlignMask))
6573 {
6574 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6575 {
6576 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6577 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6578 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
6579 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
6580 return (uint8_t)iSlot;
6581 }
6582 }
6583 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6584 }
6585 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6586}
6587
6588
6589/**
6590 * Changes the variable to a stack variable.
6591 *
6592 * Currently this is s only possible to do the first time the variable is used,
6593 * switching later is can be implemented but not done.
6594 *
6595 * @param pReNative The recompiler state.
6596 * @param idxVar The variable.
6597 * @throws VERR_IEM_VAR_IPE_2
6598 */
6599static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6600{
6601 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6602 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6603 {
6604 /* We could in theory transition from immediate to stack as well, but it
6605 would involve the caller doing work storing the value on the stack. So,
6606 till that's required we only allow transition from invalid. */
6607 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6608 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6609 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6610 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
6611
6612 /* Note! We don't allocate a stack slot here, that's only done when a
6613 slot is actually needed to hold a variable value. */
6614 }
6615}
6616
6617
6618/**
6619 * Sets it to a variable with a constant value.
6620 *
6621 * This does not require stack storage as we know the value and can always
6622 * reload it, unless of course it's referenced.
6623 *
6624 * @param pReNative The recompiler state.
6625 * @param idxVar The variable.
6626 * @param uValue The immediate value.
6627 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6628 */
6629static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6630{
6631 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6632 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
6633 {
6634 /* Only simple transitions for now. */
6635 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6636 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6637 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
6638 }
6639 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6640
6641 pReNative->Core.aVars[idxVar].u.uValue = uValue;
6642 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
6643 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
6644 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
6645}
6646
6647
6648/**
6649 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6650 *
6651 * This does not require stack storage as we know the value and can always
6652 * reload it. Loading is postponed till needed.
6653 *
6654 * @param pReNative The recompiler state.
6655 * @param idxVar The variable.
6656 * @param idxOtherVar The variable to take the (stack) address of.
6657 *
6658 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6659 */
6660static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6661{
6662 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6663 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6664
6665 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6666 {
6667 /* Only simple transitions for now. */
6668 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6669 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6670 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6671 }
6672 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6673
6674 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
6675
6676 /* Update the other variable, ensure it's a stack variable. */
6677 /** @todo handle variables with const values... that'll go boom now. */
6678 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6679 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
6680}
6681
6682
6683/**
6684 * Sets the variable to a reference (pointer) to a guest register reference.
6685 *
6686 * This does not require stack storage as we know the value and can always
6687 * reload it. Loading is postponed till needed.
6688 *
6689 * @param pReNative The recompiler state.
6690 * @param idxVar The variable.
6691 * @param enmRegClass The class guest registers to reference.
6692 * @param idxReg The register within @a enmRegClass to reference.
6693 *
6694 * @throws VERR_IEM_VAR_IPE_2
6695 */
6696static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6697 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6698{
6699 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6700
6701 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
6702 {
6703 /* Only simple transitions for now. */
6704 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6705 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6706 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
6707 }
6708 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6709
6710 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
6711 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
6712}
6713
6714
6715DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6716{
6717 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6718}
6719
6720
6721DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6722{
6723 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6724
6725 /* Since we're using a generic uint64_t value type, we must truncate it if
6726 the variable is smaller otherwise we may end up with too large value when
6727 scaling up a imm8 w/ sign-extension.
6728
6729 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6730 in the bios, bx=1) when running on arm, because clang expect 16-bit
6731 register parameters to have bits 16 and up set to zero. Instead of
6732 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6733 CF value in the result. */
6734 switch (cbType)
6735 {
6736 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6737 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6738 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6739 }
6740 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6741 return idxVar;
6742}
6743
6744
6745DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6746{
6747 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6748 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6749 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6750 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6751
6752 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6753 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
6754 return idxArgVar;
6755}
6756
6757
6758DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6759{
6760 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6761 /* Don't set to stack now, leave that to the first use as for instance
6762 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6763 return idxVar;
6764}
6765
6766
6767DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6768{
6769 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6770
6771 /* Since we're using a generic uint64_t value type, we must truncate it if
6772 the variable is smaller otherwise we may end up with too large value when
6773 scaling up a imm8 w/ sign-extension. */
6774 switch (cbType)
6775 {
6776 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6777 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6778 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6779 }
6780 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6781 return idxVar;
6782}
6783
6784
6785/**
6786 * Releases the variable's register.
6787 *
6788 * The register must have been previously acquired calling
6789 * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
6790 * iemNativeVarRegisterSetAndAcquire().
6791 */
6792DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6793{
6794 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6795 Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
6796 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6797}
6798
6799
6800/**
6801 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6802 * fixed till we call iemNativeVarRegisterRelease.
6803 *
6804 * @returns The host register number.
6805 * @param pReNative The recompiler state.
6806 * @param idxVar The variable.
6807 * @param poff Pointer to the instruction buffer offset.
6808 * In case a register needs to be freed up or the value
6809 * loaded off the stack.
6810 * @param fInitialized Set if the variable must already have been initialized.
6811 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6812 * the case.
6813 * @param idxRegPref Preferred register number or UINT8_MAX.
6814 */
6815DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6816 bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX)
6817{
6818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6819 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
6820 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6821
6822 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6823 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6824 {
6825 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
6826 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6827 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6828 return idxReg;
6829 }
6830
6831 /*
6832 * If the kind of variable has not yet been set, default to 'stack'.
6833 */
6834 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
6835 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6836 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
6837 iemNativeVarSetKindToStack(pReNative, idxVar);
6838
6839 /*
6840 * We have to allocate a register for the variable, even if its a stack one
6841 * as we don't know if there are modification being made to it before its
6842 * finalized (todo: analyze and insert hints about that?).
6843 *
6844 * If we can, we try get the correct register for argument variables. This
6845 * is assuming that most argument variables are fetched as close as possible
6846 * to the actual call, so that there aren't any interfering hidden calls
6847 * (memory accesses, etc) inbetween.
6848 *
6849 * If we cannot or it's a variable, we make sure no argument registers
6850 * that will be used by this MC block will be allocated here, and we always
6851 * prefer non-volatile registers to avoid needing to spill stuff for internal
6852 * call.
6853 */
6854 /** @todo Detect too early argument value fetches and warn about hidden
6855 * calls causing less optimal code to be generated in the python script. */
6856
6857 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6858 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6859 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6860 {
6861 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6862 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6863 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6864 }
6865 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6866 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6867 {
6868 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6869 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6870 & ~pReNative->Core.bmHstRegsWithGstShadow
6871 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6872 & fNotArgsMask;
6873 if (fRegs)
6874 {
6875 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6876 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6877 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6878 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6879 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6880 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6881 }
6882 else
6883 {
6884 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6885 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6886 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6887 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6888 }
6889 }
6890 else
6891 {
6892 idxReg = idxRegPref;
6893 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6894 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
6895 }
6896 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6897 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6898
6899 /*
6900 * Load it off the stack if we've got a stack slot.
6901 */
6902 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6903 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6904 {
6905 Assert(fInitialized);
6906 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6907 switch (pReNative->Core.aVars[idxVar].cbVar)
6908 {
6909 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6910 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6911 case 3: AssertFailed(); RT_FALL_THRU();
6912 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6913 default: AssertFailed(); RT_FALL_THRU();
6914 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6915 }
6916 }
6917 else
6918 {
6919 Assert(idxStackSlot == UINT8_MAX);
6920 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6921 }
6922 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6923 return idxReg;
6924}
6925
6926
6927/**
6928 * The value of variable @a idxVar will be written in full to the @a enmGstReg
6929 * guest register.
6930 *
6931 * This function makes sure there is a register for it and sets it to be the
6932 * current shadow copy of @a enmGstReg.
6933 *
6934 * @returns The host register number.
6935 * @param pReNative The recompiler state.
6936 * @param idxVar The variable.
6937 * @param enmGstReg The guest register this variable will be written to
6938 * after this call.
6939 * @param poff Pointer to the instruction buffer offset.
6940 * In case a register needs to be freed up or if the
6941 * variable content needs to be loaded off the stack.
6942 *
6943 * @note We DO NOT expect @a idxVar to be an argument variable,
6944 * because we can only in the commit stage of an instruction when this
6945 * function is used.
6946 */
6947DECL_HIDDEN_THROW(uint8_t)
6948iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
6949{
6950 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6951 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6952 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
6953 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
6954 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
6955 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
6956 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
6957 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6958
6959 /*
6960 * This shouldn't ever be used for arguments, unless it's in a weird else
6961 * branch that doesn't do any calling and even then it's questionable.
6962 *
6963 * However, in case someone writes crazy wrong MC code and does register
6964 * updates before making calls, just use the regular register allocator to
6965 * ensure we get a register suitable for the intended argument number.
6966 */
6967 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
6968
6969 /*
6970 * If there is already a register for the variable, we transfer/set the
6971 * guest shadow copy assignment to it.
6972 */
6973 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6974 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6975 {
6976 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
6977 {
6978 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
6979 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
6980 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
6981 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
6982 }
6983 else
6984 {
6985 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
6986 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
6987 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
6988 }
6989 /** @todo figure this one out. We need some way of making sure the register isn't
6990 * modified after this point, just in case we start writing crappy MC code. */
6991 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
6992 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6993 return idxReg;
6994 }
6995 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6996
6997 /*
6998 * Because this is supposed to be the commit stage, we're just tag along with the
6999 * temporary register allocator and upgrade it to a variable register.
7000 */
7001 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7002 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7003 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7004 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7005 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7006 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7007
7008 /*
7009 * Now we need to load the register value.
7010 */
7011 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
7012 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
7013 else
7014 {
7015 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7016 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7017 switch (pReNative->Core.aVars[idxVar].cbVar)
7018 {
7019 case sizeof(uint64_t):
7020 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7021 break;
7022 case sizeof(uint32_t):
7023 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7024 break;
7025 case sizeof(uint16_t):
7026 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7027 break;
7028 case sizeof(uint8_t):
7029 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7030 break;
7031 default:
7032 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7033 }
7034 }
7035
7036 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7037 return idxReg;
7038}
7039
7040
7041/**
7042 * Sets the host register for @a idxVarRc to @a idxReg.
7043 *
7044 * The register must not be allocated. Any guest register shadowing will be
7045 * implictly dropped by this call.
7046 *
7047 * The variable must not have any register associated with it (causes
7048 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
7049 * implied.
7050 *
7051 * @returns idxReg
7052 * @param pReNative The recompiler state.
7053 * @param idxVar The variable.
7054 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
7055 * @param off For recording in debug info.
7056 *
7057 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
7058 */
7059DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
7060{
7061 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7062 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7063 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
7064 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
7065 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
7066
7067 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
7068 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7069
7070 iemNativeVarSetKindToStack(pReNative, idxVar);
7071 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7072
7073 return idxReg;
7074}
7075
7076
7077/**
7078 * A convenient helper function.
7079 */
7080DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7081 uint8_t idxReg, uint32_t *poff)
7082{
7083 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
7084 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7085 return idxReg;
7086}
7087
7088
7089/**
7090 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7091 *
7092 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7093 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7094 * requirement of flushing anything in volatile host registers when making a
7095 * call.
7096 *
7097 * @returns New @a off value.
7098 * @param pReNative The recompiler state.
7099 * @param off The code buffer position.
7100 * @param fHstRegsNotToSave Set of registers not to save & restore.
7101 */
7102DECL_INLINE_THROW(uint32_t)
7103iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7104{
7105 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7106 if (fHstRegs)
7107 {
7108 do
7109 {
7110 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7111 fHstRegs &= ~RT_BIT_32(idxHstReg);
7112
7113 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7114 {
7115 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7116 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7117 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7118 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7119 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7120 switch (pReNative->Core.aVars[idxVar].enmKind)
7121 {
7122 case kIemNativeVarKind_Stack:
7123 {
7124 /* Temporarily spill the variable register. */
7125 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7126 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7127 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7128 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7129 continue;
7130 }
7131
7132 case kIemNativeVarKind_Immediate:
7133 case kIemNativeVarKind_VarRef:
7134 case kIemNativeVarKind_GstRegRef:
7135 /* It is weird to have any of these loaded at this point. */
7136 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7137 continue;
7138
7139 case kIemNativeVarKind_End:
7140 case kIemNativeVarKind_Invalid:
7141 break;
7142 }
7143 AssertFailed();
7144 }
7145 } while (fHstRegs);
7146 }
7147 return off;
7148}
7149
7150
7151/**
7152 * Emit code to restore volatile registers after to a call to a helper.
7153 *
7154 * @returns New @a off value.
7155 * @param pReNative The recompiler state.
7156 * @param off The code buffer position.
7157 * @param fHstRegsNotToSave Set of registers not to save & restore.
7158 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7159 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7160 */
7161DECL_INLINE_THROW(uint32_t)
7162iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7163{
7164 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7165 if (fHstRegs)
7166 {
7167 do
7168 {
7169 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7170 fHstRegs &= ~RT_BIT_32(idxHstReg);
7171
7172 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7173 {
7174 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7175 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7176 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7177 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7178 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7179 switch (pReNative->Core.aVars[idxVar].enmKind)
7180 {
7181 case kIemNativeVarKind_Stack:
7182 {
7183 /* Unspill the variable register. */
7184 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7185 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%d/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7186 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7187 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7188 continue;
7189 }
7190
7191 case kIemNativeVarKind_Immediate:
7192 case kIemNativeVarKind_VarRef:
7193 case kIemNativeVarKind_GstRegRef:
7194 /* It is weird to have any of these loaded at this point. */
7195 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7196 continue;
7197
7198 case kIemNativeVarKind_End:
7199 case kIemNativeVarKind_Invalid:
7200 break;
7201 }
7202 AssertFailed();
7203 }
7204 } while (fHstRegs);
7205 }
7206 return off;
7207}
7208
7209
7210/**
7211 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7212 *
7213 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7214 */
7215DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7216{
7217 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7218 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7219 {
7220 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7221 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7222 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7223 Assert(cSlots > 0);
7224 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7225 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
7226 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7227 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7228 }
7229 else
7230 Assert(idxStackSlot == UINT8_MAX);
7231}
7232
7233
7234/**
7235 * Worker that frees a single variable.
7236 *
7237 * ASSUMES that @a idxVar is valid.
7238 */
7239DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7240{
7241 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7242 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7243
7244 /* Free the host register first if any assigned. */
7245 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7246 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7247 {
7248 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7249 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7250 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7251 }
7252
7253 /* Free argument mapping. */
7254 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7255 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7256 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7257
7258 /* Free the stack slots. */
7259 iemNativeVarFreeStackSlots(pReNative, idxVar);
7260
7261 /* Free the actual variable. */
7262 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7263 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7264}
7265
7266
7267/**
7268 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7269 */
7270DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7271{
7272 while (bmVars != 0)
7273 {
7274 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7275 bmVars &= ~RT_BIT_32(idxVar);
7276
7277#if 1 /** @todo optimize by simplifying this later... */
7278 iemNativeVarFreeOneWorker(pReNative, idxVar);
7279#else
7280 /* Only need to free the host register, the rest is done as bulk updates below. */
7281 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7282 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7283 {
7284 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7285 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7286 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7287 }
7288#endif
7289 }
7290#if 0 /** @todo optimize by simplifying this later... */
7291 pReNative->Core.bmVars = 0;
7292 pReNative->Core.bmStack = 0;
7293 pReNative->Core.u64ArgVars = UINT64_MAX;
7294#endif
7295}
7296
7297
7298/**
7299 * This is called by IEM_MC_END() to clean up all variables.
7300 */
7301DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
7302{
7303 uint32_t const bmVars = pReNative->Core.bmVars;
7304 if (bmVars != 0)
7305 iemNativeVarFreeAllSlow(pReNative, bmVars);
7306 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7307 Assert(pReNative->Core.bmStack == 0);
7308}
7309
7310
7311#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
7312
7313/**
7314 * This is called by IEM_MC_FREE_LOCAL.
7315 */
7316DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7317{
7318 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7319 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7320 iemNativeVarFreeOneWorker(pReNative, idxVar);
7321}
7322
7323
7324#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
7325
7326/**
7327 * This is called by IEM_MC_FREE_ARG.
7328 */
7329DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7330{
7331 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7332 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
7333 iemNativeVarFreeOneWorker(pReNative, idxVar);
7334}
7335
7336
7337#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
7338
7339/**
7340 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
7341 */
7342DECL_INLINE_THROW(uint32_t)
7343iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
7344{
7345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
7346 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
7347 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7348 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
7349 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
7350
7351 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
7352 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
7353 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
7354 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7355
7356 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
7357
7358 /*
7359 * Special case for immediates.
7360 */
7361 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
7362 {
7363 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7364 {
7365 case sizeof(uint16_t):
7366 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7367 break;
7368 case sizeof(uint32_t):
7369 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7370 break;
7371 default: AssertFailed(); break;
7372 }
7373 }
7374 else
7375 {
7376 /*
7377 * The generic solution for now.
7378 */
7379 /** @todo optimize this by having the python script make sure the source
7380 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
7381 * statement. Then we could just transfer the register assignments. */
7382 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
7383 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
7384 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7385 {
7386 case sizeof(uint16_t):
7387 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
7388 break;
7389 case sizeof(uint32_t):
7390 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
7391 break;
7392 default: AssertFailed(); break;
7393 }
7394 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
7395 iemNativeVarRegisterRelease(pReNative, idxVarDst);
7396 }
7397 return off;
7398}
7399
7400
7401
7402/*********************************************************************************************************************************
7403* Emitters for IEM_MC_CALL_CIMPL_XXX *
7404*********************************************************************************************************************************/
7405
7406/**
7407 * Emits code to load a reference to the given guest register into @a idxGprDst.
7408 */
7409DECL_INLINE_THROW(uint32_t)
7410iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7411 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7412{
7413 /*
7414 * Get the offset relative to the CPUMCTX structure.
7415 */
7416 uint32_t offCpumCtx;
7417 switch (enmClass)
7418 {
7419 case kIemNativeGstRegRef_Gpr:
7420 Assert(idxRegInClass < 16);
7421 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7422 break;
7423
7424 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7425 Assert(idxRegInClass < 4);
7426 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7427 break;
7428
7429 case kIemNativeGstRegRef_EFlags:
7430 Assert(idxRegInClass == 0);
7431 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7432 break;
7433
7434 case kIemNativeGstRegRef_MxCsr:
7435 Assert(idxRegInClass == 0);
7436 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7437 break;
7438
7439 case kIemNativeGstRegRef_FpuReg:
7440 Assert(idxRegInClass < 8);
7441 AssertFailed(); /** @todo what kind of indexing? */
7442 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7443 break;
7444
7445 case kIemNativeGstRegRef_MReg:
7446 Assert(idxRegInClass < 8);
7447 AssertFailed(); /** @todo what kind of indexing? */
7448 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7449 break;
7450
7451 case kIemNativeGstRegRef_XReg:
7452 Assert(idxRegInClass < 16);
7453 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7454 break;
7455
7456 default:
7457 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7458 }
7459
7460 /*
7461 * Load the value into the destination register.
7462 */
7463#ifdef RT_ARCH_AMD64
7464 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7465
7466#elif defined(RT_ARCH_ARM64)
7467 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7468 Assert(offCpumCtx < 4096);
7469 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7470
7471#else
7472# error "Port me!"
7473#endif
7474
7475 return off;
7476}
7477
7478
7479/**
7480 * Common code for CIMPL and AIMPL calls.
7481 *
7482 * These are calls that uses argument variables and such. They should not be
7483 * confused with internal calls required to implement an MC operation,
7484 * like a TLB load and similar.
7485 *
7486 * Upon return all that is left to do is to load any hidden arguments and
7487 * perform the call. All argument variables are freed.
7488 *
7489 * @returns New code buffer offset; throws VBox status code on error.
7490 * @param pReNative The native recompile state.
7491 * @param off The code buffer offset.
7492 * @param cArgs The total nubmer of arguments (includes hidden
7493 * count).
7494 * @param cHiddenArgs The number of hidden arguments. The hidden
7495 * arguments must not have any variable declared for
7496 * them, whereas all the regular arguments must
7497 * (tstIEMCheckMc ensures this).
7498 */
7499DECL_HIDDEN_THROW(uint32_t)
7500iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7501{
7502#ifdef VBOX_STRICT
7503 /*
7504 * Assert sanity.
7505 */
7506 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7507 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7508 for (unsigned i = 0; i < cHiddenArgs; i++)
7509 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7510 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7511 {
7512 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7513 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7514 }
7515 iemNativeRegAssertSanity(pReNative);
7516#endif
7517
7518 /*
7519 * Before we do anything else, go over variables that are referenced and
7520 * make sure they are not in a register.
7521 */
7522 uint32_t bmVars = pReNative->Core.bmVars;
7523 if (bmVars)
7524 {
7525 do
7526 {
7527 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7528 bmVars &= ~RT_BIT_32(idxVar);
7529
7530 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7531 {
7532 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7533 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7534 {
7535 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7536 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7537 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7538 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7539 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7540
7541 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7542 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7543 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7544 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7545 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7546 }
7547 }
7548 } while (bmVars != 0);
7549#if 0 //def VBOX_STRICT
7550 iemNativeRegAssertSanity(pReNative);
7551#endif
7552 }
7553
7554 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7555
7556 /*
7557 * First, go over the host registers that will be used for arguments and make
7558 * sure they either hold the desired argument or are free.
7559 */
7560 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7561 {
7562 for (uint32_t i = 0; i < cRegArgs; i++)
7563 {
7564 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7565 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7566 {
7567 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7568 {
7569 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7570 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7571 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
7572 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7573 if (uArgNo == i)
7574 { /* prefect */ }
7575 /* The variable allocator logic should make sure this is impossible,
7576 except for when the return register is used as a parameter (ARM,
7577 but not x86). */
7578#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7579 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7580 {
7581# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7582# error "Implement this"
7583# endif
7584 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7585 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7586 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7587 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7588 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7589 }
7590#endif
7591 else
7592 {
7593 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7594
7595 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
7596 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7597 else
7598 {
7599 /* just free it, can be reloaded if used again */
7600 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7601 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7602 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7603 }
7604 }
7605 }
7606 else
7607 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7608 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7609 }
7610 }
7611#if 0 //def VBOX_STRICT
7612 iemNativeRegAssertSanity(pReNative);
7613#endif
7614 }
7615
7616 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7617
7618#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7619 /*
7620 * If there are any stack arguments, make sure they are in their place as well.
7621 *
7622 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7623 * the caller) be loading it later and it must be free (see first loop).
7624 */
7625 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7626 {
7627 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7628 {
7629 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7630 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7631 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7632 {
7633 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7634 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
7635 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
7636 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7637 }
7638 else
7639 {
7640 /* Use ARG0 as temp for stuff we need registers for. */
7641 switch (pReNative->Core.aVars[idxVar].enmKind)
7642 {
7643 case kIemNativeVarKind_Stack:
7644 {
7645 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7646 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7647 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7648 iemNativeStackCalcBpDisp(idxStackSlot));
7649 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7650 continue;
7651 }
7652
7653 case kIemNativeVarKind_Immediate:
7654 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
7655 continue;
7656
7657 case kIemNativeVarKind_VarRef:
7658 {
7659 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7660 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7661 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7662 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7663 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7664 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7665 {
7666 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7667 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7668 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7669 }
7670 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7671 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7672 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7673 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7674 continue;
7675 }
7676
7677 case kIemNativeVarKind_GstRegRef:
7678 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7679 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7680 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7681 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7682 continue;
7683
7684 case kIemNativeVarKind_Invalid:
7685 case kIemNativeVarKind_End:
7686 break;
7687 }
7688 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7689 }
7690 }
7691# if 0 //def VBOX_STRICT
7692 iemNativeRegAssertSanity(pReNative);
7693# endif
7694 }
7695#else
7696 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7697#endif
7698
7699 /*
7700 * Make sure the argument variables are loaded into their respective registers.
7701 *
7702 * We can optimize this by ASSUMING that any register allocations are for
7703 * registeres that have already been loaded and are ready. The previous step
7704 * saw to that.
7705 */
7706 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
7707 {
7708 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7709 {
7710 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7711 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7712 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
7713 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
7714 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
7715 else
7716 {
7717 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7718 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7719 {
7720 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7721 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
7722 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
7723 | RT_BIT_32(idxArgReg);
7724 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
7725 }
7726 else
7727 {
7728 /* Use ARG0 as temp for stuff we need registers for. */
7729 switch (pReNative->Core.aVars[idxVar].enmKind)
7730 {
7731 case kIemNativeVarKind_Stack:
7732 {
7733 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7734 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7735 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
7736 continue;
7737 }
7738
7739 case kIemNativeVarKind_Immediate:
7740 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
7741 continue;
7742
7743 case kIemNativeVarKind_VarRef:
7744 {
7745 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7746 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7747 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7748 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7749 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7750 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7751 {
7752 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7753 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7754 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7755 }
7756 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7757 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7758 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
7759 continue;
7760 }
7761
7762 case kIemNativeVarKind_GstRegRef:
7763 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
7764 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7765 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7766 continue;
7767
7768 case kIemNativeVarKind_Invalid:
7769 case kIemNativeVarKind_End:
7770 break;
7771 }
7772 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7773 }
7774 }
7775 }
7776#if 0 //def VBOX_STRICT
7777 iemNativeRegAssertSanity(pReNative);
7778#endif
7779 }
7780#ifdef VBOX_STRICT
7781 else
7782 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7783 {
7784 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
7785 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
7786 }
7787#endif
7788
7789 /*
7790 * Free all argument variables (simplified).
7791 * Their lifetime always expires with the call they are for.
7792 */
7793 /** @todo Make the python script check that arguments aren't used after
7794 * IEM_MC_CALL_XXXX. */
7795 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
7796 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
7797 * an argument value. There is also some FPU stuff. */
7798 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
7799 {
7800 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7801 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7802
7803 /* no need to free registers: */
7804 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
7805 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
7806 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
7807 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
7808 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
7809 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
7810
7811 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
7812 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7813 iemNativeVarFreeStackSlots(pReNative, idxVar);
7814 }
7815 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7816
7817 /*
7818 * Flush volatile registers as we make the call.
7819 */
7820 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
7821
7822 return off;
7823}
7824
7825
7826/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
7827DECL_HIDDEN_THROW(uint32_t)
7828iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
7829 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
7830
7831{
7832 /*
7833 * Do all the call setup and cleanup.
7834 */
7835 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
7836
7837 /*
7838 * Load the two or three hidden arguments.
7839 */
7840#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7841 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7842 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7843 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
7844#else
7845 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7846 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
7847#endif
7848
7849 /*
7850 * Make the call and check the return code.
7851 *
7852 * Shadow PC copies are always flushed here, other stuff depends on flags.
7853 * Segment and general purpose registers are explictily flushed via the
7854 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
7855 * macros.
7856 */
7857 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
7858#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7859 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7860#endif
7861 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
7862 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
7863 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
7864 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7865
7866 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7867}
7868
7869
7870#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7871 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
7872
7873/** Emits code for IEM_MC_CALL_CIMPL_1. */
7874DECL_INLINE_THROW(uint32_t)
7875iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7876 uintptr_t pfnCImpl, uint8_t idxArg0)
7877{
7878 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7879 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
7880}
7881
7882
7883#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7884 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
7885
7886/** Emits code for IEM_MC_CALL_CIMPL_2. */
7887DECL_INLINE_THROW(uint32_t)
7888iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7889 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
7890{
7891 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7892 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7893 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
7894}
7895
7896
7897#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7898 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7899 (uintptr_t)a_pfnCImpl, a0, a1, a2)
7900
7901/** Emits code for IEM_MC_CALL_CIMPL_3. */
7902DECL_INLINE_THROW(uint32_t)
7903iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7904 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7905{
7906 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7907 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7908 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7909 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
7910}
7911
7912
7913#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
7914 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7915 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
7916
7917/** Emits code for IEM_MC_CALL_CIMPL_4. */
7918DECL_INLINE_THROW(uint32_t)
7919iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7920 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7921{
7922 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7923 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7924 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7925 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7926 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
7927}
7928
7929
7930#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
7931 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7932 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
7933
7934/** Emits code for IEM_MC_CALL_CIMPL_4. */
7935DECL_INLINE_THROW(uint32_t)
7936iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7937 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
7938{
7939 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7940 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7941 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7942 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7943 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
7944 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
7945}
7946
7947
7948/** Recompiler debugging: Flush guest register shadow copies. */
7949#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
7950
7951
7952
7953/*********************************************************************************************************************************
7954* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
7955*********************************************************************************************************************************/
7956
7957/**
7958 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
7959 */
7960DECL_INLINE_THROW(uint32_t)
7961iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7962 uintptr_t pfnAImpl, uint8_t cArgs)
7963{
7964 if (idxVarRc != UINT8_MAX)
7965 {
7966 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
7967 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
7968 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
7969 }
7970
7971 /*
7972 * Do all the call setup and cleanup.
7973 */
7974 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
7975
7976 /*
7977 * Make the call and update the return code variable if we've got one.
7978 */
7979 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
7980 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
7981 {
7982pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
7983 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
7984 }
7985
7986 return off;
7987}
7988
7989
7990
7991#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
7992 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
7993
7994#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
7995 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
7996
7997/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
7998DECL_INLINE_THROW(uint32_t)
7999iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
8000{
8001 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
8002}
8003
8004
8005#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
8006 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
8007
8008#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
8009 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
8010
8011/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
8012DECL_INLINE_THROW(uint32_t)
8013iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
8014{
8015 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8016 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
8017}
8018
8019
8020#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
8021 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
8022
8023#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
8024 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
8025
8026/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
8027DECL_INLINE_THROW(uint32_t)
8028iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8029 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8030{
8031 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8032 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8033 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
8034}
8035
8036
8037#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
8038 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
8039
8040#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
8041 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
8042
8043/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
8044DECL_INLINE_THROW(uint32_t)
8045iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8046 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8047{
8048 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8049 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8050 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8051 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
8052}
8053
8054
8055#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
8056 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8057
8058#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
8059 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8060
8061/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
8062DECL_INLINE_THROW(uint32_t)
8063iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8064 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8065{
8066 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8067 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8068 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8069 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
8070 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
8071}
8072
8073
8074
8075/*********************************************************************************************************************************
8076* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
8077*********************************************************************************************************************************/
8078
8079#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
8080 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
8081
8082#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8083 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
8084
8085#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8086 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
8087
8088#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8089 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
8090
8091
8092/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
8093 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
8094DECL_INLINE_THROW(uint32_t)
8095iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
8096{
8097 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8098 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8099 Assert(iGRegEx < 20);
8100
8101 /* Same discussion as in iemNativeEmitFetchGregU16 */
8102 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8103 kIemNativeGstRegUse_ReadOnly);
8104
8105 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8106 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8107
8108 /* The value is zero-extended to the full 64-bit host register width. */
8109 if (iGRegEx < 16)
8110 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8111 else
8112 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8113
8114 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8115 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8116 return off;
8117}
8118
8119
8120#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8121 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
8122
8123#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8124 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
8125
8126#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8127 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
8128
8129/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
8130DECL_INLINE_THROW(uint32_t)
8131iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
8132{
8133 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8134 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8135 Assert(iGRegEx < 20);
8136
8137 /* Same discussion as in iemNativeEmitFetchGregU16 */
8138 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8139 kIemNativeGstRegUse_ReadOnly);
8140
8141 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8142 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8143
8144 if (iGRegEx < 16)
8145 {
8146 switch (cbSignExtended)
8147 {
8148 case sizeof(uint16_t):
8149 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8150 break;
8151 case sizeof(uint32_t):
8152 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8153 break;
8154 case sizeof(uint64_t):
8155 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8156 break;
8157 default: AssertFailed(); break;
8158 }
8159 }
8160 else
8161 {
8162 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8163 switch (cbSignExtended)
8164 {
8165 case sizeof(uint16_t):
8166 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8167 break;
8168 case sizeof(uint32_t):
8169 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8170 break;
8171 case sizeof(uint64_t):
8172 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8173 break;
8174 default: AssertFailed(); break;
8175 }
8176 }
8177
8178 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8179 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8180 return off;
8181}
8182
8183
8184
8185#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
8186 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
8187
8188#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
8189 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8190
8191#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
8192 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8193
8194/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
8195DECL_INLINE_THROW(uint32_t)
8196iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8197{
8198 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8199 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8200 Assert(iGReg < 16);
8201
8202 /*
8203 * We can either just load the low 16-bit of the GPR into a host register
8204 * for the variable, or we can do so via a shadow copy host register. The
8205 * latter will avoid having to reload it if it's being stored later, but
8206 * will waste a host register if it isn't touched again. Since we don't
8207 * know what going to happen, we choose the latter for now.
8208 */
8209 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8210 kIemNativeGstRegUse_ReadOnly);
8211
8212 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8213 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8214 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8215 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8216
8217 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8218 return off;
8219}
8220
8221
8222#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
8223 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8224
8225#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
8226 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8227
8228/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
8229DECL_INLINE_THROW(uint32_t)
8230iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
8231{
8232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8233 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8234 Assert(iGReg < 16);
8235
8236 /*
8237 * We can either just load the low 16-bit of the GPR into a host register
8238 * for the variable, or we can do so via a shadow copy host register. The
8239 * latter will avoid having to reload it if it's being stored later, but
8240 * will waste a host register if it isn't touched again. Since we don't
8241 * know what going to happen, we choose the latter for now.
8242 */
8243 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8244 kIemNativeGstRegUse_ReadOnly);
8245
8246 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8247 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8248 if (cbSignExtended == sizeof(uint32_t))
8249 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8250 else
8251 {
8252 Assert(cbSignExtended == sizeof(uint64_t));
8253 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8254 }
8255 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8256
8257 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8258 return off;
8259}
8260
8261
8262#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
8263 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
8264
8265#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
8266 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
8267
8268/** Emits code for IEM_MC_FETCH_GREG_U32. */
8269DECL_INLINE_THROW(uint32_t)
8270iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8271{
8272 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8273 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
8274 Assert(iGReg < 16);
8275
8276 /*
8277 * We can either just load the low 16-bit of the GPR into a host register
8278 * for the variable, or we can do so via a shadow copy host register. The
8279 * latter will avoid having to reload it if it's being stored later, but
8280 * will waste a host register if it isn't touched again. Since we don't
8281 * know what going to happen, we choose the latter for now.
8282 */
8283 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8284 kIemNativeGstRegUse_ReadOnly);
8285
8286 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8287 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8288 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8289 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8290
8291 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8292 return off;
8293}
8294
8295
8296#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
8297 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
8298
8299/** Emits code for IEM_MC_FETCH_GREG_U32. */
8300DECL_INLINE_THROW(uint32_t)
8301iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8302{
8303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8304 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8305 Assert(iGReg < 16);
8306
8307 /*
8308 * We can either just load the low 32-bit of the GPR into a host register
8309 * for the variable, or we can do so via a shadow copy host register. The
8310 * latter will avoid having to reload it if it's being stored later, but
8311 * will waste a host register if it isn't touched again. Since we don't
8312 * know what going to happen, we choose the latter for now.
8313 */
8314 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8315 kIemNativeGstRegUse_ReadOnly);
8316
8317 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8318 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8319 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8320 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8321
8322 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8323 return off;
8324}
8325
8326
8327#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
8328 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8329
8330#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
8331 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8332
8333/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
8334 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
8335DECL_INLINE_THROW(uint32_t)
8336iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8337{
8338 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8339 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8340 Assert(iGReg < 16);
8341
8342 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8343 kIemNativeGstRegUse_ReadOnly);
8344
8345 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8346 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8347 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
8348 /** @todo name the register a shadow one already? */
8349 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8350
8351 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8352 return off;
8353}
8354
8355
8356
8357/*********************************************************************************************************************************
8358* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
8359*********************************************************************************************************************************/
8360
8361#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
8362 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
8363
8364/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
8365DECL_INLINE_THROW(uint32_t)
8366iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
8367{
8368 Assert(iGRegEx < 20);
8369 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8370 kIemNativeGstRegUse_ForUpdate);
8371#ifdef RT_ARCH_AMD64
8372 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8373
8374 /* To the lowest byte of the register: mov r8, imm8 */
8375 if (iGRegEx < 16)
8376 {
8377 if (idxGstTmpReg >= 8)
8378 pbCodeBuf[off++] = X86_OP_REX_B;
8379 else if (idxGstTmpReg >= 4)
8380 pbCodeBuf[off++] = X86_OP_REX;
8381 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8382 pbCodeBuf[off++] = u8Value;
8383 }
8384 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
8385 else if (idxGstTmpReg < 4)
8386 {
8387 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
8388 pbCodeBuf[off++] = u8Value;
8389 }
8390 else
8391 {
8392 /* ror reg64, 8 */
8393 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8394 pbCodeBuf[off++] = 0xc1;
8395 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8396 pbCodeBuf[off++] = 8;
8397
8398 /* mov reg8, imm8 */
8399 if (idxGstTmpReg >= 8)
8400 pbCodeBuf[off++] = X86_OP_REX_B;
8401 else if (idxGstTmpReg >= 4)
8402 pbCodeBuf[off++] = X86_OP_REX;
8403 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8404 pbCodeBuf[off++] = u8Value;
8405
8406 /* rol reg64, 8 */
8407 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8408 pbCodeBuf[off++] = 0xc1;
8409 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8410 pbCodeBuf[off++] = 8;
8411 }
8412
8413#elif defined(RT_ARCH_ARM64)
8414 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
8415 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8416 if (iGRegEx < 16)
8417 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
8418 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
8419 else
8420 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
8421 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
8422 iemNativeRegFreeTmp(pReNative, idxImmReg);
8423
8424#else
8425# error "Port me!"
8426#endif
8427
8428 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8429
8430 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8431
8432 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8433 return off;
8434}
8435
8436
8437#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
8438 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
8439
8440/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
8441DECL_INLINE_THROW(uint32_t)
8442iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
8443{
8444 Assert(iGRegEx < 20);
8445 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8446
8447 /*
8448 * If it's a constant value (unlikely) we treat this as a
8449 * IEM_MC_STORE_GREG_U8_CONST statement.
8450 */
8451 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8452 { /* likely */ }
8453 else
8454 {
8455 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8456 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8457 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8458 }
8459
8460 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8461 kIemNativeGstRegUse_ForUpdate);
8462 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8463
8464#ifdef RT_ARCH_AMD64
8465 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
8466 if (iGRegEx < 16)
8467 {
8468 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8469 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8470 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8471 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8472 pbCodeBuf[off++] = X86_OP_REX;
8473 pbCodeBuf[off++] = 0x8a;
8474 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8475 }
8476 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
8477 else if (idxGstTmpReg < 4 && idxVarReg < 4)
8478 {
8479 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
8480 pbCodeBuf[off++] = 0x8a;
8481 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
8482 }
8483 else
8484 {
8485 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
8486
8487 /* ror reg64, 8 */
8488 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8489 pbCodeBuf[off++] = 0xc1;
8490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8491 pbCodeBuf[off++] = 8;
8492
8493 /* mov reg8, reg8(r/m) */
8494 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8495 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8496 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8497 pbCodeBuf[off++] = X86_OP_REX;
8498 pbCodeBuf[off++] = 0x8a;
8499 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8500
8501 /* rol reg64, 8 */
8502 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8503 pbCodeBuf[off++] = 0xc1;
8504 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8505 pbCodeBuf[off++] = 8;
8506 }
8507
8508#elif defined(RT_ARCH_ARM64)
8509 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
8510 or
8511 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
8512 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8513 if (iGRegEx < 16)
8514 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
8515 else
8516 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
8517
8518#else
8519# error "Port me!"
8520#endif
8521 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8522
8523 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8524
8525 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8526 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8527 return off;
8528}
8529
8530
8531
8532#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
8533 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
8534
8535/** Emits code for IEM_MC_STORE_GREG_U16. */
8536DECL_INLINE_THROW(uint32_t)
8537iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
8538{
8539 Assert(iGReg < 16);
8540 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8541 kIemNativeGstRegUse_ForUpdate);
8542#ifdef RT_ARCH_AMD64
8543 /* mov reg16, imm16 */
8544 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8545 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8546 if (idxGstTmpReg >= 8)
8547 pbCodeBuf[off++] = X86_OP_REX_B;
8548 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
8549 pbCodeBuf[off++] = RT_BYTE1(uValue);
8550 pbCodeBuf[off++] = RT_BYTE2(uValue);
8551
8552#elif defined(RT_ARCH_ARM64)
8553 /* movk xdst, #uValue, lsl #0 */
8554 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8555 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
8556
8557#else
8558# error "Port me!"
8559#endif
8560
8561 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8562
8563 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8564 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8565 return off;
8566}
8567
8568
8569#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
8570 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
8571
8572/** Emits code for IEM_MC_STORE_GREG_U16. */
8573DECL_INLINE_THROW(uint32_t)
8574iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8575{
8576 Assert(iGReg < 16);
8577 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8578
8579 /*
8580 * If it's a constant value (unlikely) we treat this as a
8581 * IEM_MC_STORE_GREG_U16_CONST statement.
8582 */
8583 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8584 { /* likely */ }
8585 else
8586 {
8587 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8588 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8589 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8590 }
8591
8592 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8593 kIemNativeGstRegUse_ForUpdate);
8594
8595#ifdef RT_ARCH_AMD64
8596 /* mov reg16, reg16 or [mem16] */
8597 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8598 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8599 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8600 {
8601 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
8602 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
8603 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
8604 pbCodeBuf[off++] = 0x8b;
8605 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
8606 }
8607 else
8608 {
8609 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
8610 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8611 if (idxGstTmpReg >= 8)
8612 pbCodeBuf[off++] = X86_OP_REX_R;
8613 pbCodeBuf[off++] = 0x8b;
8614 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8615 }
8616
8617#elif defined(RT_ARCH_ARM64)
8618 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
8619 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8620 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8621 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
8622 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8623
8624#else
8625# error "Port me!"
8626#endif
8627
8628 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8629
8630 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8631 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8632 return off;
8633}
8634
8635
8636#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
8637 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
8638
8639/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
8640DECL_INLINE_THROW(uint32_t)
8641iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
8642{
8643 Assert(iGReg < 16);
8644 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8645 kIemNativeGstRegUse_ForFullWrite);
8646 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8647 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8648 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8649 return off;
8650}
8651
8652
8653#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
8654 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
8655
8656/** Emits code for IEM_MC_STORE_GREG_U32. */
8657DECL_INLINE_THROW(uint32_t)
8658iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8659{
8660 Assert(iGReg < 16);
8661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8662
8663 /*
8664 * If it's a constant value (unlikely) we treat this as a
8665 * IEM_MC_STORE_GREG_U32_CONST statement.
8666 */
8667 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8668 { /* likely */ }
8669 else
8670 {
8671 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8672 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8673 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8674 }
8675
8676 /*
8677 * For the rest we allocate a guest register for the variable and writes
8678 * it to the CPUMCTX structure.
8679 */
8680 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8681 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8682#ifdef VBOX_STRICT
8683 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
8684#endif
8685 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8686 return off;
8687}
8688
8689
8690#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
8691 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
8692
8693/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
8694DECL_INLINE_THROW(uint32_t)
8695iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
8696{
8697 Assert(iGReg < 16);
8698 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8699 kIemNativeGstRegUse_ForFullWrite);
8700 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8701 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8702 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8703 return off;
8704}
8705
8706
8707#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
8708 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
8709
8710/** Emits code for IEM_MC_STORE_GREG_U64. */
8711DECL_INLINE_THROW(uint32_t)
8712iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8713{
8714 Assert(iGReg < 16);
8715 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8716
8717 /*
8718 * If it's a constant value (unlikely) we treat this as a
8719 * IEM_MC_STORE_GREG_U64_CONST statement.
8720 */
8721 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8722 { /* likely */ }
8723 else
8724 {
8725 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8726 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8727 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
8728 }
8729
8730 /*
8731 * For the rest we allocate a guest register for the variable and writes
8732 * it to the CPUMCTX structure.
8733 */
8734 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8735 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8736 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8737 return off;
8738}
8739
8740
8741#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
8742 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
8743
8744/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
8745DECL_INLINE_THROW(uint32_t)
8746iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
8747{
8748 Assert(iGReg < 16);
8749 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8750 kIemNativeGstRegUse_ForUpdate);
8751 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
8752 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8753 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8754 return off;
8755}
8756
8757
8758/*********************************************************************************************************************************
8759* General purpose register manipulation (add, sub). *
8760*********************************************************************************************************************************/
8761
8762#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8763 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8764
8765/** Emits code for IEM_MC_ADD_GREG_U16. */
8766DECL_INLINE_THROW(uint32_t)
8767iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
8768{
8769 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8770 kIemNativeGstRegUse_ForUpdate);
8771
8772#ifdef RT_ARCH_AMD64
8773 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8774 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8775 if (idxGstTmpReg >= 8)
8776 pbCodeBuf[off++] = X86_OP_REX_B;
8777 if (uAddend == 1)
8778 {
8779 pbCodeBuf[off++] = 0xff; /* inc */
8780 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8781 }
8782 else
8783 {
8784 pbCodeBuf[off++] = 0x81;
8785 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8786 pbCodeBuf[off++] = uAddend;
8787 pbCodeBuf[off++] = 0;
8788 }
8789
8790#else
8791 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8792 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8793
8794 /* sub tmp, gstgrp, uAddend */
8795 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
8796
8797 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8798 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8799
8800 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8801#endif
8802
8803 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8804
8805 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8806
8807 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8808 return off;
8809}
8810
8811
8812#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
8813 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8814
8815#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
8816 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8817
8818/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
8819DECL_INLINE_THROW(uint32_t)
8820iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
8821{
8822 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8823 kIemNativeGstRegUse_ForUpdate);
8824
8825#ifdef RT_ARCH_AMD64
8826 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8827 if (f64Bit)
8828 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8829 else if (idxGstTmpReg >= 8)
8830 pbCodeBuf[off++] = X86_OP_REX_B;
8831 if (uAddend == 1)
8832 {
8833 pbCodeBuf[off++] = 0xff; /* inc */
8834 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8835 }
8836 else if (uAddend < 128)
8837 {
8838 pbCodeBuf[off++] = 0x83; /* add */
8839 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8840 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8841 }
8842 else
8843 {
8844 pbCodeBuf[off++] = 0x81; /* add */
8845 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8846 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8847 pbCodeBuf[off++] = 0;
8848 pbCodeBuf[off++] = 0;
8849 pbCodeBuf[off++] = 0;
8850 }
8851
8852#else
8853 /* sub tmp, gstgrp, uAddend */
8854 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8855 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
8856
8857#endif
8858
8859 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8860
8861 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8862
8863 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8864 return off;
8865}
8866
8867
8868
8869#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8870 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8871
8872/** Emits code for IEM_MC_SUB_GREG_U16. */
8873DECL_INLINE_THROW(uint32_t)
8874iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
8875{
8876 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8877 kIemNativeGstRegUse_ForUpdate);
8878
8879#ifdef RT_ARCH_AMD64
8880 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8881 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8882 if (idxGstTmpReg >= 8)
8883 pbCodeBuf[off++] = X86_OP_REX_B;
8884 if (uSubtrahend == 1)
8885 {
8886 pbCodeBuf[off++] = 0xff; /* dec */
8887 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8888 }
8889 else
8890 {
8891 pbCodeBuf[off++] = 0x81;
8892 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8893 pbCodeBuf[off++] = uSubtrahend;
8894 pbCodeBuf[off++] = 0;
8895 }
8896
8897#else
8898 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8899 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8900
8901 /* sub tmp, gstgrp, uSubtrahend */
8902 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
8903
8904 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8905 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8906
8907 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8908#endif
8909
8910 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8911
8912 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8913
8914 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8915 return off;
8916}
8917
8918
8919#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
8920 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8921
8922#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
8923 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8924
8925/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
8926DECL_INLINE_THROW(uint32_t)
8927iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
8928{
8929 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8930 kIemNativeGstRegUse_ForUpdate);
8931
8932#ifdef RT_ARCH_AMD64
8933 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8934 if (f64Bit)
8935 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8936 else if (idxGstTmpReg >= 8)
8937 pbCodeBuf[off++] = X86_OP_REX_B;
8938 if (uSubtrahend == 1)
8939 {
8940 pbCodeBuf[off++] = 0xff; /* dec */
8941 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8942 }
8943 else if (uSubtrahend < 128)
8944 {
8945 pbCodeBuf[off++] = 0x83; /* sub */
8946 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8947 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8948 }
8949 else
8950 {
8951 pbCodeBuf[off++] = 0x81; /* sub */
8952 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8953 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8954 pbCodeBuf[off++] = 0;
8955 pbCodeBuf[off++] = 0;
8956 pbCodeBuf[off++] = 0;
8957 }
8958
8959#else
8960 /* sub tmp, gstgrp, uSubtrahend */
8961 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8962 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
8963
8964#endif
8965
8966 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8967
8968 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8969
8970 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8971 return off;
8972}
8973
8974
8975
8976/*********************************************************************************************************************************
8977* EFLAGS *
8978*********************************************************************************************************************************/
8979
8980#define IEM_MC_FETCH_EFLAGS(a_EFlags) \
8981 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
8982
8983/** Handles IEM_MC_FETCH_EFLAGS. */
8984DECL_INLINE_THROW(uint32_t)
8985iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8986{
8987 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8988 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8989
8990 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
8991 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8992 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8993 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
8994 return off;
8995}
8996
8997
8998#define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
8999 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
9000
9001/** Handles IEM_MC_COMMIT_EFLAGS. */
9002DECL_INLINE_THROW(uint32_t)
9003iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
9004{
9005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9006 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9007
9008 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
9009
9010#ifdef VBOX_STRICT
9011 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
9012 uint32_t offFixup = off;
9013 off = iemNativeEmitJnzToFixed(pReNative, off, off);
9014 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
9015 iemNativeFixupFixedJump(pReNative, offFixup, off);
9016
9017 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
9018 offFixup = off;
9019 off = iemNativeEmitJzToFixed(pReNative, off, off);
9020 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
9021 iemNativeFixupFixedJump(pReNative, offFixup, off);
9022#endif
9023
9024 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9025 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
9026 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9027 return off;
9028}
9029
9030
9031
9032/*********************************************************************************************************************************
9033* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
9034*********************************************************************************************************************************/
9035
9036#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
9037 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
9038
9039#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
9040 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
9041
9042#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
9043 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
9044
9045
9046/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
9047 * IEM_MC_FETCH_SREG_ZX_U64. */
9048DECL_INLINE_THROW(uint32_t)
9049iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
9050{
9051 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9052 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
9053 Assert(iSReg < X86_SREG_COUNT);
9054
9055 /*
9056 * For now, we will not create a shadow copy of a selector. The rational
9057 * is that since we do not recompile the popping and loading of segment
9058 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
9059 * pushing and moving to registers, there is only a small chance that the
9060 * shadow copy will be accessed again before the register is reloaded. One
9061 * scenario would be nested called in 16-bit code, but I doubt it's worth
9062 * the extra register pressure atm.
9063 *
9064 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
9065 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
9066 * store scencario covered at present (r160730).
9067 */
9068 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9069 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9070 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
9071 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9072 return off;
9073}
9074
9075
9076
9077/*********************************************************************************************************************************
9078* Register references. *
9079*********************************************************************************************************************************/
9080
9081#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
9082 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
9083
9084#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
9085 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
9086
9087/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
9088DECL_INLINE_THROW(uint32_t)
9089iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
9090{
9091 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9092 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9093 Assert(iGRegEx < 20);
9094
9095 if (iGRegEx < 16)
9096 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9097 else
9098 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
9099
9100 /* If we've delayed writing back the register value, flush it now. */
9101 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9102
9103 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9104 if (!fConst)
9105 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
9106
9107 return off;
9108}
9109
9110#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
9111 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
9112
9113#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
9114 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
9115
9116#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
9117 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
9118
9119#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
9120 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
9121
9122#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
9123 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
9124
9125#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
9126 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
9127
9128#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
9129 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
9130
9131#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
9132 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
9133
9134#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
9135 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
9136
9137#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
9138 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
9139
9140/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
9141DECL_INLINE_THROW(uint32_t)
9142iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
9143{
9144 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9145 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9146 Assert(iGReg < 16);
9147
9148 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
9149
9150 /* If we've delayed writing back the register value, flush it now. */
9151 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
9152
9153 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9154 if (!fConst)
9155 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
9156
9157 return off;
9158}
9159
9160
9161#define IEM_MC_REF_EFLAGS(a_pEFlags) \
9162 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
9163
9164/** Handles IEM_MC_REF_EFLAGS. */
9165DECL_INLINE_THROW(uint32_t)
9166iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
9167{
9168 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9169 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9170
9171 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
9172
9173 /* If we've delayed writing back the register value, flush it now. */
9174 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
9175
9176 /* If there is a shadow copy of guest EFLAGS, flush it now. */
9177 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
9178
9179 return off;
9180}
9181
9182
9183/*********************************************************************************************************************************
9184* Effective Address Calculation *
9185*********************************************************************************************************************************/
9186#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
9187 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
9188
9189/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
9190 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
9191DECL_INLINE_THROW(uint32_t)
9192iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9193 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
9194{
9195 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9196
9197 /*
9198 * Handle the disp16 form with no registers first.
9199 *
9200 * Convert to an immediate value, as that'll delay the register allocation
9201 * and assignment till the memory access / call / whatever and we can use
9202 * a more appropriate register (or none at all).
9203 */
9204 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
9205 {
9206 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
9207 return off;
9208 }
9209
9210 /* Determin the displacment. */
9211 uint16_t u16EffAddr;
9212 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9213 {
9214 case 0: u16EffAddr = 0; break;
9215 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
9216 case 2: u16EffAddr = u16Disp; break;
9217 default: AssertFailedStmt(u16EffAddr = 0);
9218 }
9219
9220 /* Determine the registers involved. */
9221 uint8_t idxGstRegBase;
9222 uint8_t idxGstRegIndex;
9223 switch (bRm & X86_MODRM_RM_MASK)
9224 {
9225 case 0:
9226 idxGstRegBase = X86_GREG_xBX;
9227 idxGstRegIndex = X86_GREG_xSI;
9228 break;
9229 case 1:
9230 idxGstRegBase = X86_GREG_xBX;
9231 idxGstRegIndex = X86_GREG_xDI;
9232 break;
9233 case 2:
9234 idxGstRegBase = X86_GREG_xBP;
9235 idxGstRegIndex = X86_GREG_xSI;
9236 break;
9237 case 3:
9238 idxGstRegBase = X86_GREG_xBP;
9239 idxGstRegIndex = X86_GREG_xDI;
9240 break;
9241 case 4:
9242 idxGstRegBase = X86_GREG_xSI;
9243 idxGstRegIndex = UINT8_MAX;
9244 break;
9245 case 5:
9246 idxGstRegBase = X86_GREG_xDI;
9247 idxGstRegIndex = UINT8_MAX;
9248 break;
9249 case 6:
9250 idxGstRegBase = X86_GREG_xBP;
9251 idxGstRegIndex = UINT8_MAX;
9252 break;
9253#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
9254 default:
9255#endif
9256 case 7:
9257 idxGstRegBase = X86_GREG_xBX;
9258 idxGstRegIndex = UINT8_MAX;
9259 break;
9260 }
9261
9262 /*
9263 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
9264 */
9265 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9266 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9267 kIemNativeGstRegUse_ReadOnly);
9268 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
9269 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9270 kIemNativeGstRegUse_ReadOnly)
9271 : UINT8_MAX;
9272#ifdef RT_ARCH_AMD64
9273 if (idxRegIndex == UINT8_MAX)
9274 {
9275 if (u16EffAddr == 0)
9276 {
9277 /* movxz ret, base */
9278 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
9279 }
9280 else
9281 {
9282 /* lea ret32, [base64 + disp32] */
9283 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9284 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9285 if (idxRegRet >= 8 || idxRegBase >= 8)
9286 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9287 pbCodeBuf[off++] = 0x8d;
9288 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9289 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
9290 else
9291 {
9292 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
9293 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9294 }
9295 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9296 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9297 pbCodeBuf[off++] = 0;
9298 pbCodeBuf[off++] = 0;
9299 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9300
9301 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9302 }
9303 }
9304 else
9305 {
9306 /* lea ret32, [index64 + base64 (+ disp32)] */
9307 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9308 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9309 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9310 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9311 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9312 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9313 pbCodeBuf[off++] = 0x8d;
9314 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
9315 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9316 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
9317 if (bMod == X86_MOD_MEM4)
9318 {
9319 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9320 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9321 pbCodeBuf[off++] = 0;
9322 pbCodeBuf[off++] = 0;
9323 }
9324 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9325 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9326 }
9327
9328#elif defined(RT_ARCH_ARM64)
9329 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9330 if (u16EffAddr == 0)
9331 {
9332 if (idxRegIndex == UINT8_MAX)
9333 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
9334 else
9335 {
9336 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
9337 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9338 }
9339 }
9340 else
9341 {
9342 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
9343 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
9344 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
9345 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9346 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
9347 else
9348 {
9349 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
9350 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9351 }
9352 if (idxRegIndex != UINT8_MAX)
9353 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
9354 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9355 }
9356
9357#else
9358# error "port me"
9359#endif
9360
9361 if (idxRegIndex != UINT8_MAX)
9362 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9363 iemNativeRegFreeTmp(pReNative, idxRegBase);
9364 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9365 return off;
9366}
9367
9368
9369#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
9370 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
9371
9372/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
9373 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
9374DECL_INLINE_THROW(uint32_t)
9375iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9376 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
9377{
9378 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9379
9380 /*
9381 * Handle the disp32 form with no registers first.
9382 *
9383 * Convert to an immediate value, as that'll delay the register allocation
9384 * and assignment till the memory access / call / whatever and we can use
9385 * a more appropriate register (or none at all).
9386 */
9387 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9388 {
9389 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
9390 return off;
9391 }
9392
9393 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9394 uint32_t u32EffAddr = 0;
9395 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9396 {
9397 case 0: break;
9398 case 1: u32EffAddr = (int8_t)u32Disp; break;
9399 case 2: u32EffAddr = u32Disp; break;
9400 default: AssertFailed();
9401 }
9402
9403 /* Get the register (or SIB) value. */
9404 uint8_t idxGstRegBase = UINT8_MAX;
9405 uint8_t idxGstRegIndex = UINT8_MAX;
9406 uint8_t cShiftIndex = 0;
9407 switch (bRm & X86_MODRM_RM_MASK)
9408 {
9409 case 0: idxGstRegBase = X86_GREG_xAX; break;
9410 case 1: idxGstRegBase = X86_GREG_xCX; break;
9411 case 2: idxGstRegBase = X86_GREG_xDX; break;
9412 case 3: idxGstRegBase = X86_GREG_xBX; break;
9413 case 4: /* SIB */
9414 {
9415 /* index /w scaling . */
9416 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9417 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9418 {
9419 case 0: idxGstRegIndex = X86_GREG_xAX; break;
9420 case 1: idxGstRegIndex = X86_GREG_xCX; break;
9421 case 2: idxGstRegIndex = X86_GREG_xDX; break;
9422 case 3: idxGstRegIndex = X86_GREG_xBX; break;
9423 case 4: cShiftIndex = 0; /*no index*/ break;
9424 case 5: idxGstRegIndex = X86_GREG_xBP; break;
9425 case 6: idxGstRegIndex = X86_GREG_xSI; break;
9426 case 7: idxGstRegIndex = X86_GREG_xDI; break;
9427 }
9428
9429 /* base */
9430 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
9431 {
9432 case 0: idxGstRegBase = X86_GREG_xAX; break;
9433 case 1: idxGstRegBase = X86_GREG_xCX; break;
9434 case 2: idxGstRegBase = X86_GREG_xDX; break;
9435 case 3: idxGstRegBase = X86_GREG_xBX; break;
9436 case 4:
9437 idxGstRegBase = X86_GREG_xSP;
9438 u32EffAddr += uSibAndRspOffset >> 8;
9439 break;
9440 case 5:
9441 if ((bRm & X86_MODRM_MOD_MASK) != 0)
9442 idxGstRegBase = X86_GREG_xBP;
9443 else
9444 {
9445 Assert(u32EffAddr == 0);
9446 u32EffAddr = u32Disp;
9447 }
9448 break;
9449 case 6: idxGstRegBase = X86_GREG_xSI; break;
9450 case 7: idxGstRegBase = X86_GREG_xDI; break;
9451 }
9452 break;
9453 }
9454 case 5: idxGstRegBase = X86_GREG_xBP; break;
9455 case 6: idxGstRegBase = X86_GREG_xSI; break;
9456 case 7: idxGstRegBase = X86_GREG_xDI; break;
9457 }
9458
9459 /*
9460 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9461 * the start of the function.
9462 */
9463 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9464 {
9465 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
9466 return off;
9467 }
9468
9469 /*
9470 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9471 */
9472 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9473 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9474 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9475 kIemNativeGstRegUse_ReadOnly);
9476 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9477 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9478 kIemNativeGstRegUse_ReadOnly);
9479
9480 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9481 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9482 {
9483 idxRegBase = idxRegIndex;
9484 idxRegIndex = UINT8_MAX;
9485 }
9486
9487#ifdef RT_ARCH_AMD64
9488 if (idxRegIndex == UINT8_MAX)
9489 {
9490 if (u32EffAddr == 0)
9491 {
9492 /* mov ret, base */
9493 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9494 }
9495 else
9496 {
9497 /* lea ret32, [base64 + disp32] */
9498 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9499 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9500 if (idxRegRet >= 8 || idxRegBase >= 8)
9501 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9502 pbCodeBuf[off++] = 0x8d;
9503 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9504 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9505 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9506 else
9507 {
9508 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9509 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9510 }
9511 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9512 if (bMod == X86_MOD_MEM4)
9513 {
9514 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9515 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9516 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9517 }
9518 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9519 }
9520 }
9521 else
9522 {
9523 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9524 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9525 if (idxRegBase == UINT8_MAX)
9526 {
9527 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
9528 if (idxRegRet >= 8 || idxRegIndex >= 8)
9529 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9530 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9531 pbCodeBuf[off++] = 0x8d;
9532 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9533 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9534 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9535 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9536 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9537 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9538 }
9539 else
9540 {
9541 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9542 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9543 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9544 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9545 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9546 pbCodeBuf[off++] = 0x8d;
9547 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9548 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9549 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9550 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9551 if (bMod != X86_MOD_MEM0)
9552 {
9553 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9554 if (bMod == X86_MOD_MEM4)
9555 {
9556 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9557 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9558 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9559 }
9560 }
9561 }
9562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9563 }
9564
9565#elif defined(RT_ARCH_ARM64)
9566 if (u32EffAddr == 0)
9567 {
9568 if (idxRegIndex == UINT8_MAX)
9569 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9570 else if (idxRegBase == UINT8_MAX)
9571 {
9572 if (cShiftIndex == 0)
9573 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
9574 else
9575 {
9576 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9577 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
9578 }
9579 }
9580 else
9581 {
9582 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9583 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9584 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9585 }
9586 }
9587 else
9588 {
9589 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
9590 {
9591 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9592 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
9593 }
9594 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
9595 {
9596 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9597 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9598 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
9599 }
9600 else
9601 {
9602 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
9603 if (idxRegBase != UINT8_MAX)
9604 {
9605 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9606 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9607 }
9608 }
9609 if (idxRegIndex != UINT8_MAX)
9610 {
9611 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9612 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9613 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9614 }
9615 }
9616
9617#else
9618# error "port me"
9619#endif
9620
9621 if (idxRegIndex != UINT8_MAX)
9622 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9623 if (idxRegBase != UINT8_MAX)
9624 iemNativeRegFreeTmp(pReNative, idxRegBase);
9625 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9626 return off;
9627}
9628
9629
9630#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9631 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9632 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9633
9634#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9635 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9636 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9637
9638#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9639 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9640 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
9641
9642/**
9643 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
9644 *
9645 * @returns New off.
9646 * @param pReNative .
9647 * @param off .
9648 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
9649 * bit 4 to REX.X. The two bits are part of the
9650 * REG sub-field, which isn't needed in this
9651 * function.
9652 * @param uSibAndRspOffset Two parts:
9653 * - The first 8 bits make up the SIB byte.
9654 * - The next 8 bits are the fixed RSP/ESP offset
9655 * in case of a pop [xSP].
9656 * @param u32Disp The displacement byte/word/dword, if any.
9657 * @param cbInstr The size of the fully decoded instruction. Used
9658 * for RIP relative addressing.
9659 * @param idxVarRet The result variable number.
9660 * @param f64Bit Whether to use a 64-bit or 32-bit address size
9661 * when calculating the address.
9662 *
9663 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
9664 */
9665DECL_INLINE_THROW(uint32_t)
9666iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
9667 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
9668{
9669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9670
9671 /*
9672 * Special case the rip + disp32 form first.
9673 */
9674 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9675 {
9676 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9677 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
9678 kIemNativeGstRegUse_ReadOnly);
9679#ifdef RT_ARCH_AMD64
9680 if (f64Bit)
9681 {
9682 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
9683 if ((int32_t)offFinalDisp == offFinalDisp)
9684 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
9685 else
9686 {
9687 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
9688 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
9689 }
9690 }
9691 else
9692 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
9693
9694#elif defined(RT_ARCH_ARM64)
9695 if (f64Bit)
9696 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9697 (int64_t)(int32_t)u32Disp + cbInstr);
9698 else
9699 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9700 (int32_t)u32Disp + cbInstr);
9701
9702#else
9703# error "Port me!"
9704#endif
9705 iemNativeRegFreeTmp(pReNative, idxRegPc);
9706 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9707 return off;
9708 }
9709
9710 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9711 int64_t i64EffAddr = 0;
9712 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9713 {
9714 case 0: break;
9715 case 1: i64EffAddr = (int8_t)u32Disp; break;
9716 case 2: i64EffAddr = (int32_t)u32Disp; break;
9717 default: AssertFailed();
9718 }
9719
9720 /* Get the register (or SIB) value. */
9721 uint8_t idxGstRegBase = UINT8_MAX;
9722 uint8_t idxGstRegIndex = UINT8_MAX;
9723 uint8_t cShiftIndex = 0;
9724 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
9725 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
9726 else /* SIB: */
9727 {
9728 /* index /w scaling . */
9729 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9730 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9731 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
9732 if (idxGstRegIndex == 4)
9733 {
9734 /* no index */
9735 cShiftIndex = 0;
9736 idxGstRegIndex = UINT8_MAX;
9737 }
9738
9739 /* base */
9740 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
9741 if (idxGstRegBase == 4)
9742 {
9743 /* pop [rsp] hack */
9744 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
9745 }
9746 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
9747 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
9748 {
9749 /* mod=0 and base=5 -> disp32, no base reg. */
9750 Assert(i64EffAddr == 0);
9751 i64EffAddr = (int32_t)u32Disp;
9752 idxGstRegBase = UINT8_MAX;
9753 }
9754 }
9755
9756 /*
9757 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9758 * the start of the function.
9759 */
9760 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9761 {
9762 if (f64Bit)
9763 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
9764 else
9765 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
9766 return off;
9767 }
9768
9769 /*
9770 * Now emit code that calculates:
9771 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9772 * or if !f64Bit:
9773 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9774 */
9775 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9776 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9777 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9778 kIemNativeGstRegUse_ReadOnly);
9779 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9780 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9781 kIemNativeGstRegUse_ReadOnly);
9782
9783 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9784 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9785 {
9786 idxRegBase = idxRegIndex;
9787 idxRegIndex = UINT8_MAX;
9788 }
9789
9790#ifdef RT_ARCH_AMD64
9791 uint8_t bFinalAdj;
9792 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
9793 bFinalAdj = 0; /* likely */
9794 else
9795 {
9796 /* pop [rsp] with a problematic disp32 value. Split out the
9797 RSP offset and add it separately afterwards (bFinalAdj). */
9798 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
9799 Assert(idxGstRegBase == X86_GREG_xSP);
9800 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
9801 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
9802 Assert(bFinalAdj != 0);
9803 i64EffAddr -= bFinalAdj;
9804 Assert((int32_t)i64EffAddr == i64EffAddr);
9805 }
9806 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
9807//pReNative->pInstrBuf[off++] = 0xcc;
9808
9809 if (idxRegIndex == UINT8_MAX)
9810 {
9811 if (u32EffAddr == 0)
9812 {
9813 /* mov ret, base */
9814 if (f64Bit)
9815 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
9816 else
9817 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9818 }
9819 else
9820 {
9821 /* lea ret, [base + disp32] */
9822 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9823 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9824 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
9825 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9826 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9827 | (f64Bit ? X86_OP_REX_W : 0);
9828 pbCodeBuf[off++] = 0x8d;
9829 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9830 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9831 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9832 else
9833 {
9834 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9835 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9836 }
9837 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9838 if (bMod == X86_MOD_MEM4)
9839 {
9840 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9841 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9842 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9843 }
9844 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9845 }
9846 }
9847 else
9848 {
9849 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9850 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9851 if (idxRegBase == UINT8_MAX)
9852 {
9853 /* lea ret, [(index64 << cShiftIndex) + disp32] */
9854 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
9855 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9856 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9857 | (f64Bit ? X86_OP_REX_W : 0);
9858 pbCodeBuf[off++] = 0x8d;
9859 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9860 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9861 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9862 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9863 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9864 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9865 }
9866 else
9867 {
9868 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9869 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9870 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9871 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9872 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9873 | (f64Bit ? X86_OP_REX_W : 0);
9874 pbCodeBuf[off++] = 0x8d;
9875 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9876 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9877 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9878 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9879 if (bMod != X86_MOD_MEM0)
9880 {
9881 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9882 if (bMod == X86_MOD_MEM4)
9883 {
9884 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9885 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9886 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9887 }
9888 }
9889 }
9890 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9891 }
9892
9893 if (!bFinalAdj)
9894 { /* likely */ }
9895 else
9896 {
9897 Assert(f64Bit);
9898 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
9899 }
9900
9901#elif defined(RT_ARCH_ARM64)
9902 if (i64EffAddr == 0)
9903 {
9904 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9905 if (idxRegIndex == UINT8_MAX)
9906 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
9907 else if (idxRegBase != UINT8_MAX)
9908 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9909 f64Bit, false /*fSetFlags*/, cShiftIndex);
9910 else
9911 {
9912 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
9913 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
9914 }
9915 }
9916 else
9917 {
9918 if (f64Bit)
9919 { /* likely */ }
9920 else
9921 i64EffAddr = (int32_t)i64EffAddr;
9922
9923 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
9924 {
9925 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9926 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
9927 }
9928 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
9929 {
9930 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9931 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
9932 }
9933 else
9934 {
9935 if (f64Bit)
9936 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
9937 else
9938 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
9939 if (idxRegBase != UINT8_MAX)
9940 {
9941 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9942 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
9943 }
9944 }
9945 if (idxRegIndex != UINT8_MAX)
9946 {
9947 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9948 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9949 f64Bit, false /*fSetFlags*/, cShiftIndex);
9950 }
9951 }
9952
9953#else
9954# error "port me"
9955#endif
9956
9957 if (idxRegIndex != UINT8_MAX)
9958 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9959 if (idxRegBase != UINT8_MAX)
9960 iemNativeRegFreeTmp(pReNative, idxRegBase);
9961 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9962 return off;
9963}
9964
9965
9966/*********************************************************************************************************************************
9967* TLB Lookup. *
9968*********************************************************************************************************************************/
9969
9970#if (defined(RT_ARCH_AMD64) && 1) || (defined(RT_ARCH_ARM64) && 1)
9971# define IEMNATIVE_WITH_TLB_LOOKUP
9972#endif
9973
9974
9975/**
9976 * This must be instantiate *before* branching off to the lookup code,
9977 * so that register spilling and whatnot happens for everyone.
9978 */
9979typedef struct IEMNATIVEEMITTLBSTATE
9980{
9981 bool const fSkip;
9982 uint8_t const idxRegPtrHlp; /**< We don't support immediate variables with register assignment, so this a tmp reg alloc. */
9983 uint8_t const idxRegPtr;
9984 uint8_t const idxRegSegBase;
9985 uint8_t const idxRegSegLimit;
9986 uint8_t const idxRegSegAttrib;
9987 uint8_t const idxReg1;
9988 uint8_t const idxReg2;
9989#if defined(RT_ARCH_ARM64)
9990 uint8_t const idxReg3;
9991#endif
9992 uint64_t const uAbsPtr;
9993
9994 IEMNATIVEEMITTLBSTATE(PIEMRECOMPILERSTATE a_pReNative, uint32_t *a_poff, uint8_t a_idxVarGCPtrMem,
9995 uint8_t a_iSegReg, uint8_t a_cbMem)
9996#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9997 /* 32-bit and 64-bit wraparound will require special handling, so skip these for absolute addresses. */
9998 : fSkip( a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
9999 && ( (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT
10000 ? (uint64_t)(UINT32_MAX - a_cbMem)
10001 : (uint64_t)(UINT64_MAX - a_cbMem))
10002 < a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue)
10003#else
10004 : fSkip(true)
10005#endif
10006#if defined(RT_ARCH_AMD64) /* got good immediate encoding, otherwise we just load the address in a reg immediately. */
10007 , idxRegPtrHlp(UINT8_MAX)
10008#else
10009 , idxRegPtrHlp( a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate
10010 || fSkip
10011 ? UINT8_MAX
10012 : iemNativeRegAllocTmpImm(a_pReNative, a_poff, a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue) )
10013#endif
10014 , idxRegPtr(a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate
10015 ? iemNativeVarRegisterAcquire(a_pReNative, a_idxVarGCPtrMem, a_poff,
10016 true /*fInitialized*/, IEMNATIVE_CALL_ARG2_GREG)
10017 : idxRegPtrHlp)
10018 , idxRegSegBase(a_iSegReg == UINT8_MAX || fSkip
10019 ? UINT8_MAX
10020 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_BASE(a_iSegReg)))
10021 , idxRegSegLimit((a_iSegReg == UINT8_MAX && (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT) || fSkip
10022 ? UINT8_MAX
10023 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_LIMIT(a_iSegReg)))
10024 , idxRegSegAttrib((a_iSegReg == UINT8_MAX && (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT) || fSkip
10025 ? UINT8_MAX
10026 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_ATTRIB(a_iSegReg)))
10027 , idxReg1(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
10028 , idxReg2(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
10029#if defined(RT_ARCH_ARM64)
10030 , idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
10031#endif
10032 , uAbsPtr( a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate || fSkip
10033 ? UINT64_MAX
10034 : a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue)
10035
10036 {
10037 RT_NOREF_PV(a_cbMem);
10038 }
10039
10040 void freeRegsAndReleaseVars(PIEMRECOMPILERSTATE a_pReNative, uint8_t idxVarGCPtrMem) const
10041 {
10042 if (idxRegPtr != UINT8_MAX)
10043 {
10044 if (idxRegPtrHlp == UINT8_MAX)
10045 iemNativeVarRegisterRelease(a_pReNative, idxVarGCPtrMem);
10046 else
10047 {
10048 Assert(idxRegPtrHlp == idxRegPtr);
10049 iemNativeRegFreeTmpImm(a_pReNative, idxRegPtrHlp);
10050 }
10051 }
10052 else
10053 Assert(idxRegPtrHlp == UINT8_MAX);
10054 if (idxRegSegBase != UINT8_MAX)
10055 iemNativeRegFreeTmp(a_pReNative, idxRegSegBase);
10056 if (idxRegSegLimit != UINT8_MAX)
10057 {
10058 iemNativeRegFreeTmp(a_pReNative, idxRegSegLimit);
10059 iemNativeRegFreeTmp(a_pReNative, idxRegSegAttrib);
10060 }
10061 else
10062 Assert(idxRegSegAttrib == UINT8_MAX);
10063#if defined(RT_ARCH_ARM64)
10064 iemNativeRegFreeTmp(a_pReNative, idxReg3);
10065#endif
10066 iemNativeRegFreeTmp(a_pReNative, idxReg2);
10067 iemNativeRegFreeTmp(a_pReNative, idxReg1);
10068
10069 }
10070
10071 uint32_t getRegsNotToSave() const
10072 {
10073 if (!fSkip)
10074 return RT_BIT_32(idxReg1)
10075 | RT_BIT_32(idxReg2)
10076#if defined(RT_ARCH_ARM64)
10077 | RT_BIT_32(idxReg3)
10078#endif
10079 ;
10080 return 0;
10081 }
10082
10083 /** This is only for avoid assertions. */
10084 uint32_t getActiveRegsWithShadows() const
10085 {
10086#ifdef VBOX_STRICT
10087 if (!fSkip)
10088 return RT_BIT_32(idxRegSegBase) | RT_BIT_32(idxRegSegLimit) | RT_BIT_32(idxRegSegAttrib);
10089#endif
10090 return 0;
10091 }
10092} IEMNATIVEEMITTLBSTATE;
10093
10094
10095#ifdef IEMNATIVE_WITH_TLB_LOOKUP
10096DECL_INLINE_THROW(uint32_t)
10097iemNativeEmitTlbLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEEMITTLBSTATE const * const pTlbState,
10098 uint8_t iSegReg, uint8_t cbMem, uint8_t fAlignMask, uint32_t fAccess,
10099 uint32_t idxLabelTlbLookup, uint32_t idxLabelTlbMiss, uint8_t idxRegMemResult,
10100 uint8_t offDisp = 0)
10101{
10102 RT_NOREF(offDisp);
10103 Assert(!pTlbState->fSkip);
10104# if defined(RT_ARCH_AMD64)
10105 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 512);
10106# elif defined(RT_ARCH_ARM64)
10107 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
10108# endif
10109
10110 /*
10111 * The expand down check isn't use all that much, so we emit here to keep
10112 * the lookup straighter.
10113 */
10114 /* check_expand_down: ; complicted! */
10115 uint32_t const offCheckExpandDown = off;
10116 uint32_t offFixupLimitDone = 0;
10117 if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
10118 {
10119off = iemNativeEmitBrkEx(pCodeBuf, off, 1); /** @todo this needs testing */
10120 /* cmp seglim, regptr */ /** @todo r=bird: why 64-bit compare again? */
10121 if (pTlbState->idxRegPtr != UINT8_MAX)
10122 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, pTlbState->idxRegPtr);
10123 else
10124 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit, (uint32_t)pTlbState->uAbsPtr);
10125 /* ja tlbmiss */
10126 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10127 /* reg1 = segattr & X86DESCATTR_D (0x4000) */
10128 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib, X86DESCATTR_D);
10129 /* xor reg1, X86DESCATTR_D */
10130 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_D);
10131 /* shl reg1, 2 (16 - 14) */
10132 AssertCompile((X86DESCATTR_D << 2) == UINT32_C(0x10000));
10133 off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, pTlbState->idxReg1, 2);
10134 /* dec reg1 (=> 0xffff if D=0; 0xffffffff if D=1) */
10135 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, 1);
10136 /* cmp reg1, reg2 (64-bit) / imm (32-bit) */
10137 if (pTlbState->idxRegPtr != UINT8_MAX)
10138 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1,
10139 cbMem > 1 ? pTlbState->idxReg2 : pTlbState->idxRegPtr);
10140 else
10141 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, (uint32_t)(pTlbState->uAbsPtr + cbMem - 1));
10142 /* jbe tlbmiss */
10143 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
10144 /* jmp limitdone */
10145 offFixupLimitDone = off;
10146 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off /* ASSUME short jump suffices */);
10147 }
10148
10149 /*
10150 * tlblookup:
10151 */
10152 iemNativeLabelDefine(pReNative, idxLabelTlbLookup, off);
10153# if defined(RT_ARCH_ARM64) && 0
10154 off = iemNativeEmitBrkEx(pCodeBuf, off, 0);
10155# endif
10156
10157 /*
10158 * 1. Segmentation.
10159 *
10160 * 1a. Check segment limit and attributes if non-flat 32-bit code. This is complicated.
10161 */
10162 if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
10163 {
10164 /* If we're accessing more than one byte, put the last address we'll be
10165 accessing in idxReg2 (64-bit). */
10166 if (cbMem > 1 && pTlbState->idxRegPtr != UINT8_MAX)
10167 {
10168 /* reg2 = regptr + cbMem - 1 ; 64-bit result so we can fend of wraparounds/overflows. */
10169 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, pTlbState->idxReg2,/*=*/ pTlbState->idxRegPtr,/*+*/ cbMem - 1);
10170 }
10171
10172 /* Check that we've got a segment loaded and that it allows the access.
10173 For write access this means a writable data segment.
10174 For read-only accesses this means a readable code segment or any data segment. */
10175 if (fAccess & IEM_ACCESS_TYPE_WRITE)
10176 {
10177 uint32_t const fMustBe1 = X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_WRITE;
10178 uint32_t const fMustBe0 = X86DESCATTR_UNUSABLE | X86_SEL_TYPE_CODE;
10179 /* reg1 = segattrs & (must1|must0) */
10180 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,
10181 pTlbState->idxRegSegAttrib, fMustBe1 | fMustBe0);
10182 /* cmp reg1, must1 */
10183 AssertCompile(fMustBe1 <= UINT16_MAX);
10184 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, fMustBe1);
10185 /* jne tlbmiss */
10186 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10187 }
10188 else
10189 {
10190 /* U | !P |!DT |!CD | RW |
10191 16 | 8 | 4 | 3 | 1 |
10192 -------------------------------
10193 0 | 0 | 0 | 0 | 0 | execute-only code segment. - must be excluded
10194 0 | 0 | 0 | 0 | 1 | execute-read code segment.
10195 0 | 0 | 0 | 1 | 0 | read-only data segment.
10196 0 | 0 | 0 | 1 | 1 | read-write data segment. - last valid combination
10197 */
10198 /* reg1 = segattrs & (relevant attributes) */
10199 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib,
10200 X86DESCATTR_UNUSABLE | X86DESCATTR_P | X86DESCATTR_DT
10201 | X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE);
10202 /* xor reg1, X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE ; place C=1 RW=0 at the bottom & limit the range.
10203 ; EO-code=0, ER-code=2, RO-data=8, RW-data=10 */
10204#ifdef RT_ARCH_ARM64
10205 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_DT | X86_SEL_TYPE_CODE);
10206 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_P);
10207#else
10208 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1,
10209 X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE);
10210#endif
10211 /* sub reg1, X86_SEL_TYPE_WRITE ; EO-code=-2, ER-code=0, RO-data=6, RW-data=8 */
10212 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_WRITE /* ER-code */);
10213 /* cmp reg1, X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE */
10214 AssertCompile(X86_SEL_TYPE_CODE == 8);
10215 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_CODE);
10216 /* ja tlbmiss */
10217 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10218 }
10219
10220 /*
10221 * Check the limit. If this is a write access, we know that it's a
10222 * data segment and includes the expand_down bit. For read-only accesses
10223 * we need to check that code/data=0 and expanddown=1 before continuing.
10224 */
10225 if (fAccess & IEM_ACCESS_TYPE_WRITE)
10226 {
10227 /* test segattrs, X86_SEL_TYPE_DOWN */
10228 AssertCompile(X86_SEL_TYPE_DOWN < 128);
10229 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, pTlbState->idxRegSegAttrib, X86_SEL_TYPE_DOWN);
10230 /* jnz check_expand_down */
10231 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_ne);
10232 }
10233 else
10234 {
10235 /* reg1 = segattr & (code | down) */
10236 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,
10237 pTlbState->idxRegSegAttrib, X86_SEL_TYPE_CODE | X86_SEL_TYPE_DOWN);
10238 /* cmp reg1, down */
10239 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_DOWN);
10240 /* je check_expand_down */
10241 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_e);
10242 }
10243
10244 /* expand_up:
10245 cmp seglim, regptr/reg2/imm */
10246 if (pTlbState->idxRegPtr != UINT8_MAX)
10247 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, cbMem > 1 ? pTlbState->idxReg2 : pTlbState->idxRegPtr);
10248 else
10249 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit, (uint32_t)pTlbState->uAbsPtr);
10250 /* jbe tlbmiss */
10251 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
10252
10253 /* limitdone: */
10254 iemNativeFixupFixedJump(pReNative, offFixupLimitDone, off);
10255 }
10256
10257 /* 1b. Add the segment base. We use idxRegMemResult for the ptr register if this step is
10258 required or if the address is a constant (simplicity). */
10259 uint8_t const idxRegFlatPtr = iSegReg != UINT8_MAX || pTlbState->idxRegPtr == UINT8_MAX
10260 ? idxRegMemResult : pTlbState->idxRegPtr;
10261 if (iSegReg != UINT8_MAX)
10262 {
10263 /* regflat = segbase + regptr/imm */
10264 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
10265 {
10266 Assert(iSegReg >= X86_SREG_FS);
10267 if (pTlbState->idxRegPtr != UINT8_MAX)
10268 off = iemNativeEmitGprEqGprPlusGprEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase, pTlbState->idxRegPtr);
10269 else
10270 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase, pTlbState->uAbsPtr);
10271 }
10272 else if (pTlbState->idxRegPtr != UINT8_MAX)
10273 off = iemNativeEmitGpr32EqGprPlusGprEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase, pTlbState->idxRegPtr);
10274 else
10275 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr,
10276 pTlbState->idxRegSegBase, (uint32_t)pTlbState->uAbsPtr);
10277 }
10278 else if (pTlbState->idxRegPtr == UINT8_MAX)
10279 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->uAbsPtr);
10280
10281 /*
10282 * 2. Check that the address doesn't cross a page boundrary and doesn't have alignment issues.
10283 *
10284 * 2a. Alignment check using fAlignMask.
10285 */
10286 if (fAlignMask)
10287 {
10288 Assert(RT_IS_POWER_OF_TWO(fAlignMask + 1));
10289 Assert(fAlignMask < 128);
10290 /* test regflat, fAlignMask */
10291 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegFlatPtr, fAlignMask);
10292 /* jnz tlbmiss */
10293 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10294 }
10295
10296 /*
10297 * 2b. Check that it's not crossing page a boundrary. This is implicit in
10298 * the previous test if the alignment is same or larger than the type.
10299 */
10300 if (cbMem > fAlignMask + 1)
10301 {
10302 /* reg1 = regflat & 0xfff */
10303 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,/*=*/ idxRegFlatPtr,/*&*/ GUEST_PAGE_OFFSET_MASK);
10304 /* cmp reg1, GUEST_PAGE_SIZE - cbMem */
10305 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE);
10306 /* ja tlbmiss */
10307 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10308 }
10309
10310 /*
10311 * 3. TLB lookup.
10312 *
10313 * 3a. Calculate the TLB tag value (IEMTLB_CALC_TAG).
10314 * In 64-bit mode we will also check for non-canonical addresses here.
10315 */
10316 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
10317 {
10318# if defined(RT_ARCH_AMD64)
10319 /* mov reg1, regflat */
10320 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr);
10321 /* rol reg1, 16 */
10322 off = iemNativeEmitRotateGprLeftEx(pCodeBuf, off, pTlbState->idxReg1, 16);
10323 /** @todo Would 'movsx reg2, word reg1' and working on reg2 in dwords be faster? */
10324 /* inc word reg1 */
10325 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10326 if (pTlbState->idxReg1 >= 8)
10327 pCodeBuf[off++] = X86_OP_REX_B;
10328 pCodeBuf[off++] = 0xff;
10329 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, pTlbState->idxReg1 & 7);
10330 /* cmp word reg1, 1 */
10331 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10332 if (pTlbState->idxReg1 >= 8)
10333 pCodeBuf[off++] = X86_OP_REX_B;
10334 pCodeBuf[off++] = 0x83;
10335 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, pTlbState->idxReg1 & 7);
10336 pCodeBuf[off++] = 1;
10337 /* ja tlbmiss */
10338 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10339 /* shr reg1, 16 + GUEST_PAGE_SHIFT */
10340 off = iemNativeEmitShiftGprRightEx(pCodeBuf, off, pTlbState->idxReg1, 16 + GUEST_PAGE_SHIFT);
10341
10342# elif defined(RT_ARCH_ARM64)
10343 /* lsr reg1, regflat, #48 */
10344 pCodeBuf[off++] = Armv8A64MkInstrLslImm(pTlbState->idxReg1, idxRegFlatPtr, 4);
10345 /* add reg1, reg1, #1 */
10346 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(pTlbState->idxReg1, pTlbState->idxReg1, 1, false /*f64Bit*/);
10347 /* tst reg1, #0xfffe */
10348 Assert(Armv8A64ConvertImmRImmS2Mask32(14, 31) == 0xfffe);
10349 pCodeBuf[off++] = Armv8A64MkInstrTstImm(pTlbState->idxReg1, 14, 31, false /*f64Bit*/);
10350 /* b.nq tlbmiss */
10351 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10352
10353 /* ubfx reg1, regflat, #12, #36 */
10354 pCodeBuf[off++] = Armv8A64MkInstrUbfx(pTlbState->idxReg1, idxRegFlatPtr, GUEST_PAGE_SHIFT, 48 - GUEST_PAGE_SHIFT);
10355# else
10356# error "Port me"
10357# endif
10358 }
10359 else
10360 {
10361 /* reg1 = (uint32_t)(regflat >> 12) */
10362 off = iemNativeEmitGpr32EqGprShiftRightImmEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr, GUEST_PAGE_SHIFT);
10363 }
10364 /* or reg1, [qword pVCpu->iem.s.DataTlb.uTlbRevision] */
10365# if defined(RT_ARCH_AMD64)
10366 pCodeBuf[off++] = pTlbState->idxReg1 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
10367 pCodeBuf[off++] = 0x0b; /* OR r64,r/m64 */
10368 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, pTlbState->idxReg1, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbRevision));
10369# else
10370 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbRevision));
10371 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
10372# endif
10373
10374 /*
10375 * 3b. Calc pTlbe.
10376 */
10377# if defined(RT_ARCH_AMD64)
10378 /* movzx reg2, byte reg1 */
10379 off = iemNativeEmitLoadGprFromGpr8Ex(pCodeBuf, off, pTlbState->idxReg2, pTlbState->idxReg1);
10380 /* shl reg2, 5 ; reg2 *= sizeof(IEMTLBENTRY) */
10381 AssertCompileSize(IEMTLBENTRY, 32);
10382 off = iemNativeEmitShiftGprLeftEx(pCodeBuf, off, pTlbState->idxReg2, 5);
10383 /* lea reg2, [pVCpu->iem.s.DataTlb.aEntries + reg2] */
10384 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU < 8);
10385 pCodeBuf[off++] = pTlbState->idxReg2 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_X | X86_OP_REX_R;
10386 pCodeBuf[off++] = 0x8d;
10387 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, pTlbState->idxReg2 & 7, 4 /*SIB*/);
10388 pCodeBuf[off++] = X86_SIB_MAKE(IEMNATIVE_REG_FIXED_PVMCPU & 7, pTlbState->idxReg2 & 7, 0);
10389 pCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10390 pCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10391 pCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10392 pCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10393
10394# elif defined(RT_ARCH_ARM64)
10395 /* reg2 = (reg1 & 0xff) << 5 */
10396 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(pTlbState->idxReg2, pTlbState->idxReg1, 5, 8);
10397 /* reg2 += offsetof(VMCPUCC, iem.s.DataTlb.aEntries) */
10398 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, pTlbState->idxReg2, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries),
10399 pTlbState->idxReg3 /*iGprTmp*/);
10400 /* reg2 += pVCpu */
10401 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, pTlbState->idxReg2, IEMNATIVE_REG_FIXED_PVMCPU);
10402# else
10403# error "Port me"
10404# endif
10405
10406 /*
10407 * 3c. Compare the TLBE.uTag with the one from 2a (reg1).
10408 */
10409# if defined(RT_ARCH_AMD64)
10410 /* cmp reg1, [reg2] */
10411 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
10412 pCodeBuf[off++] = 0x3b;
10413 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
10414# elif defined(RT_ARCH_ARM64)
10415 off = iemNativeEmitLoadGprByGprEx(pCodeBuf, off, pTlbState->idxReg3, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
10416 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
10417# else
10418# error "Port me"
10419# endif
10420 /* jne tlbmiss */
10421 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10422
10423 /*
10424 * 4. Check TLB page table level access flags and physical page revision #.
10425 */
10426 /* mov reg1, mask */
10427 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
10428 uint64_t const fNoUser = (((pReNative->fExec >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK) + 1) & IEMTLBE_F_PT_NO_USER;
10429 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1,
10430 IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
10431 | IEMTLBE_F_PG_UNASSIGNED | IEMTLBE_F_PG_NO_READ
10432 | IEMTLBE_F_PT_NO_ACCESSED | fNoUser);
10433# if defined(RT_ARCH_AMD64)
10434 /* and reg1, [reg2->fFlagsAndPhysRev] */
10435 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
10436 pCodeBuf[off++] = 0x23;
10437 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1,
10438 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
10439
10440 /* cmp reg1, [pVCpu->iem.s.DataTlb.uTlbPhysRev] */
10441 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R);
10442 pCodeBuf[off++] = 0x3b;
10443 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, IEMNATIVE_REG_FIXED_PVMCPU,
10444 RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbPhysRev));
10445# elif defined(RT_ARCH_ARM64)
10446 off = iemNativeEmitLoadGprByGprEx(pCodeBuf, off, pTlbState->idxReg3, pTlbState->idxReg2,
10447 RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
10448 pCodeBuf[off++] = Armv8A64MkInstrAnd(pTlbState->idxReg1, pTlbState->idxReg1, pTlbState->idxReg3);
10449 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbPhysRev));
10450 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
10451# else
10452# error "Port me"
10453# endif
10454 /* jne tlbmiss */
10455 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10456
10457 /*
10458 * 5. Check that pbMappingR3 isn't NULL (paranoia) and calculate the
10459 * resulting pointer.
10460 */
10461 /* mov reg1, [reg2->pbMappingR3] */
10462 off = iemNativeEmitLoadGprByGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
10463 RT_UOFFSETOF(IEMTLBENTRY, pbMappingR3));
10464 /* if (!reg1) jmp tlbmiss */
10465 /** @todo eliminate the need for this test? */
10466 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, pTlbState->idxReg1,
10467 true /*f64Bit*/, idxLabelTlbMiss);
10468
10469 if (idxRegFlatPtr == idxRegMemResult) /* See step 1b. */
10470 {
10471 /* and result, 0xfff */
10472 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegMemResult, GUEST_PAGE_OFFSET_MASK);
10473 }
10474 else
10475 {
10476 Assert(idxRegFlatPtr == pTlbState->idxRegPtr);
10477 /* result = regflat & 0xfff */
10478 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, idxRegMemResult, idxRegFlatPtr, GUEST_PAGE_OFFSET_MASK);
10479 }
10480 /* add result, reg1 */
10481 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxReg1);
10482
10483 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10484
10485 return off;
10486}
10487#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
10488
10489
10490/*********************************************************************************************************************************
10491* Memory fetches and stores common *
10492*********************************************************************************************************************************/
10493
10494typedef enum IEMNATIVEMITMEMOP
10495{
10496 kIemNativeEmitMemOp_Store = 0,
10497 kIemNativeEmitMemOp_Fetch,
10498 kIemNativeEmitMemOp_Fetch_Zx_U16,
10499 kIemNativeEmitMemOp_Fetch_Zx_U32,
10500 kIemNativeEmitMemOp_Fetch_Zx_U64,
10501 kIemNativeEmitMemOp_Fetch_Sx_U16,
10502 kIemNativeEmitMemOp_Fetch_Sx_U32,
10503 kIemNativeEmitMemOp_Fetch_Sx_U64
10504} IEMNATIVEMITMEMOP;
10505
10506/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
10507 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
10508 * (with iSegReg = UINT8_MAX). */
10509DECL_INLINE_THROW(uint32_t)
10510iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
10511 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
10512 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
10513{
10514 /*
10515 * Assert sanity.
10516 */
10517 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10518 Assert( enmOp != kIemNativeEmitMemOp_Store
10519 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
10520 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
10521 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
10522 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10523 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
10524 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10525 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
10526 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
10527 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
10528#ifdef VBOX_STRICT
10529 if (iSegReg == UINT8_MAX)
10530 {
10531 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10532 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10533 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10534 switch (cbMem)
10535 {
10536 case 1:
10537 Assert( pfnFunction
10538 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
10539 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10540 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10541 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10542 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10543 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
10544 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
10545 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
10546 : UINT64_C(0xc000b000a0009000) ));
10547 break;
10548 case 2:
10549 Assert( pfnFunction
10550 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
10551 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10552 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10553 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10554 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
10555 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
10556 : UINT64_C(0xc000b000a0009000) ));
10557 break;
10558 case 4:
10559 Assert( pfnFunction
10560 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
10561 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10562 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10563 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
10564 : UINT64_C(0xc000b000a0009000) ));
10565 break;
10566 case 8:
10567 Assert( pfnFunction
10568 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
10569 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
10570 : UINT64_C(0xc000b000a0009000) ));
10571 break;
10572 }
10573 }
10574 else
10575 {
10576 Assert(iSegReg < 6);
10577 switch (cbMem)
10578 {
10579 case 1:
10580 Assert( pfnFunction
10581 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
10582 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
10583 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10584 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10585 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10586 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
10587 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
10588 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
10589 : UINT64_C(0xc000b000a0009000) ));
10590 break;
10591 case 2:
10592 Assert( pfnFunction
10593 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
10594 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
10595 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
10596 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
10597 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
10598 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
10599 : UINT64_C(0xc000b000a0009000) ));
10600 break;
10601 case 4:
10602 Assert( pfnFunction
10603 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
10604 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
10605 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
10606 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
10607 : UINT64_C(0xc000b000a0009000) ));
10608 break;
10609 case 8:
10610 Assert( pfnFunction
10611 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
10612 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
10613 : UINT64_C(0xc000b000a0009000) ));
10614 break;
10615 }
10616 }
10617#endif
10618
10619#ifdef VBOX_STRICT
10620 /*
10621 * Check that the fExec flags we've got make sense.
10622 */
10623 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10624#endif
10625
10626 /*
10627 * To keep things simple we have to commit any pending writes first as we
10628 * may end up making calls.
10629 */
10630 /** @todo we could postpone this till we make the call and reload the
10631 * registers after returning from the call. Not sure if that's sensible or
10632 * not, though. */
10633 off = iemNativeRegFlushPendingWrites(pReNative, off);
10634
10635 /*
10636 * Move/spill/flush stuff out of call-volatile registers.
10637 * This is the easy way out. We could contain this to the tlb-miss branch
10638 * by saving and restoring active stuff here.
10639 */
10640 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10641 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10642
10643 /*
10644 * Define labels and allocate the result register (trying for the return
10645 * register if we can).
10646 */
10647 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10648 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10649 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10650 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX /* special case value storing below */
10651 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
10652 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
10653 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
10654
10655 /*
10656 * First we try to go via the TLB.
10657 */
10658//pReNative->pInstrBuf[off++] = 0xcc;
10659 /** @todo later. */
10660 RT_NOREF(fAlignMask, cbMem);
10661
10662 /*
10663 * Call helper to do the fetching.
10664 * We flush all guest register shadow copies here.
10665 */
10666 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10667
10668#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10669 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10670#else
10671 RT_NOREF(idxInstr);
10672#endif
10673
10674 uint8_t idxRegArgValue;
10675 if (iSegReg == UINT8_MAX)
10676 idxRegArgValue = IEMNATIVE_CALL_ARG2_GREG;
10677 else
10678 {
10679 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
10680 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
10681 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
10682
10683 idxRegArgValue = IEMNATIVE_CALL_ARG3_GREG;
10684 }
10685
10686 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
10687 if (enmOp == kIemNativeEmitMemOp_Store)
10688 {
10689 if (pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate)
10690 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgValue, pReNative->Core.aVars[idxVarValue].u.uValue);
10691 else
10692 {
10693 uint8_t const idxRegVarValue = pReNative->Core.aVars[idxVarValue].idxReg;
10694 if (idxRegVarValue < RT_ELEMENTS(pReNative->Core.aHstRegs))
10695 {
10696 Assert(!(RT_BIT_32(idxRegVarValue) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10697 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgValue, idxRegVarValue);
10698 }
10699 else
10700 {
10701 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarValue].idxStackSlot;
10702 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10703 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgValue, iemNativeStackCalcBpDisp(idxStackSlot));
10704 }
10705 }
10706 }
10707
10708 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
10709 if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
10710 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG,
10711 pReNative->Core.aVars[idxVarGCPtrMem].u.uValue + offDisp);
10712 else
10713 {
10714 uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
10715 if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
10716 {
10717 Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10718 if (!offDisp)
10719 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem);
10720 else
10721 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem, offDisp);
10722 }
10723 else
10724 {
10725 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarGCPtrMem].idxStackSlot;
10726 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10727 AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
10728 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iemNativeStackCalcBpDisp(idxStackSlot));
10729 if (offDisp)
10730 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offDisp);
10731 }
10732 }
10733
10734 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10736
10737 /* Done setting up parameters, make the call. */
10738 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10739
10740 /*
10741 * Put the result in the right register if this is a fetch.
10742 */
10743 if (enmOp != kIemNativeEmitMemOp_Store)
10744 {
10745 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
10746 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
10747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
10748 iemNativeVarRegisterRelease(pReNative, idxVarValue);
10749 }
10750
10751 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10752
10753 return off;
10754}
10755
10756
10757
10758/*********************************************************************************************************************************
10759* Memory fetches (IEM_MEM_FETCH_XXX). *
10760*********************************************************************************************************************************/
10761
10762/* 8-bit segmented: */
10763#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
10764 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
10765 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10766 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10767
10768#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10769 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10770 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10771 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10772
10773#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10774 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10775 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10776 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10777
10778#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10779 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10780 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10781 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10782
10783#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10784 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10785 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10786 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10787
10788#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10789 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10790 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10791 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10792
10793#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10794 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10795 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10796 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10797
10798/* 16-bit segmented: */
10799#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10800 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10801 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10802 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10803
10804#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10805 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10806 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10807 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10808
10809#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10810 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10811 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10812 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10813
10814#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10815 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10816 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10817 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10818
10819#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10820 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10821 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10822 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10823
10824#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10825 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10826 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10827 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10828
10829
10830/* 32-bit segmented: */
10831#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10832 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10833 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10834 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10835
10836#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10837 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10838 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10839 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10840
10841#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10842 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10843 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10844 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10845
10846#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10847 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10848 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10849 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10850
10851
10852/* 64-bit segmented: */
10853#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10854 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10855 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10856 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
10857
10858
10859
10860/* 8-bit flat: */
10861#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
10862 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
10863 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10864 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10865
10866#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
10867 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10868 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10869 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10870
10871#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
10872 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10873 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10874 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10875
10876#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
10877 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10878 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10879 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10880
10881#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
10882 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10883 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10884 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10885
10886#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
10887 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10888 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10889 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10890
10891#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
10892 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10893 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10894 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10895
10896
10897/* 16-bit flat: */
10898#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
10899 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10900 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10901 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10902
10903#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
10904 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10905 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10906 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10907
10908#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
10909 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10910 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10911 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10912
10913#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
10914 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10915 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10916 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10917
10918#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
10919 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10920 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10921 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10922
10923#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
10924 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10925 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10926 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10927
10928/* 32-bit flat: */
10929#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
10930 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10931 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10932 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10933
10934#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
10935 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10936 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10937 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10938
10939#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
10940 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10941 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10942 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10943
10944#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
10945 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10946 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10947 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10948
10949/* 64-bit flat: */
10950#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
10951 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10952 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10953 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
10954
10955
10956
10957/*********************************************************************************************************************************
10958* Memory stores (IEM_MEM_STORE_XXX). *
10959*********************************************************************************************************************************/
10960
10961#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
10962 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
10963 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10964 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
10965
10966#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
10967 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
10968 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10969 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
10970
10971#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
10972 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
10973 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10974 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
10975
10976#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
10977 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
10978 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
10979 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
10980
10981
10982#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
10983 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
10984 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10985 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
10986
10987#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
10988 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
10989 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10990 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
10991
10992#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
10993 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
10994 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10995 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
10996
10997#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
10998 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
10999 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11000 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11001
11002
11003#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
11004 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11005 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11006
11007#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
11008 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11009 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11010
11011#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
11012 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11013 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11014
11015#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
11016 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11017 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11018
11019
11020#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
11021 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11022 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11023
11024#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
11025 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11026 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11027
11028#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
11029 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11030 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11031
11032#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
11033 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11034 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11035
11036/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
11037 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
11038DECL_INLINE_THROW(uint32_t)
11039iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
11040 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
11041{
11042 /*
11043 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
11044 * to do the grunt work.
11045 */
11046 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
11047 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
11048 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
11049 pfnFunction, idxInstr);
11050 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
11051 return off;
11052}
11053
11054
11055
11056/*********************************************************************************************************************************
11057* Stack Accesses. *
11058*********************************************************************************************************************************/
11059/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
11060#define IEM_MC_PUSH_U16(a_u16Value) \
11061 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11062 (uintptr_t)iemNativeHlpStackPushU16, pCallEntry->idxInstr)
11063#define IEM_MC_PUSH_U32(a_u32Value) \
11064 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11065 (uintptr_t)iemNativeHlpStackPushU32, pCallEntry->idxInstr)
11066#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
11067 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
11068 (uintptr_t)iemNativeHlpStackPushU32SReg, pCallEntry->idxInstr)
11069#define IEM_MC_PUSH_U64(a_u64Value) \
11070 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11071 (uintptr_t)iemNativeHlpStackPushU64, pCallEntry->idxInstr)
11072
11073#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
11074 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11075 (uintptr_t)iemNativeHlpStackFlat32PushU16, pCallEntry->idxInstr)
11076#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
11077 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11078 (uintptr_t)iemNativeHlpStackFlat32PushU32, pCallEntry->idxInstr)
11079#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
11080 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
11081 (uintptr_t)iemNativeHlpStackFlat32PushU32SReg, pCallEntry->idxInstr)
11082
11083#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
11084 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11085 (uintptr_t)iemNativeHlpStackFlat64PushU16, pCallEntry->idxInstr)
11086#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
11087 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11088 (uintptr_t)iemNativeHlpStackFlat64PushU64, pCallEntry->idxInstr)
11089
11090/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
11091DECL_INLINE_THROW(uint32_t)
11092iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
11093 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11094{
11095 /*
11096 * Assert sanity.
11097 */
11098 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11099#ifdef VBOX_STRICT
11100 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11101 {
11102 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11103 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11104 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11105 Assert( pfnFunction
11106 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU16
11107 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32
11108 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32SReg
11109 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU16
11110 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU64
11111 : UINT64_C(0xc000b000a0009000) ));
11112 }
11113 else
11114 Assert( pfnFunction
11115 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU16
11116 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU32
11117 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackPushU32SReg
11118 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU64
11119 : UINT64_C(0xc000b000a0009000) ));
11120#endif
11121
11122#ifdef VBOX_STRICT
11123 /*
11124 * Check that the fExec flags we've got make sense.
11125 */
11126 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11127#endif
11128
11129 /*
11130 * To keep things simple we have to commit any pending writes first as we
11131 * may end up making calls.
11132 */
11133 /** @todo we could postpone this till we make the call and reload the
11134 * registers after returning from the call. Not sure if that's sensible or
11135 * not, though. */
11136 off = iemNativeRegFlushPendingWrites(pReNative, off);
11137
11138 /*
11139 * Move/spill/flush stuff out of call-volatile registers, keeping whatever
11140 * idxVarValue might be occupying.
11141 *
11142 * This is the easy way out. We could contain this to the tlb-miss branch
11143 * by saving and restoring active stuff here.
11144 */
11145 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
11146 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarValue));
11147
11148 /* For now, flush any shadow copy of the xSP register. */
11149 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
11150
11151 /*
11152 * Define labels and allocate the result register (trying for the return
11153 * register if we can).
11154 */
11155 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11156 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11157 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11158
11159 /*
11160 * First we try to go via the TLB.
11161 */
11162//pReNative->pInstrBuf[off++] = 0xcc;
11163 /** @todo later. */
11164 RT_NOREF(cBitsVarAndFlat);
11165
11166 /*
11167 * Call helper to do the popping.
11168 */
11169 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11170
11171#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11172 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11173#else
11174 RT_NOREF(idxInstr);
11175#endif
11176
11177 /* IEMNATIVE_CALL_ARG1_GREG = idxVarValue (first) */
11178 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarValue,
11179 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11180
11181 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11182 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11183
11184 /* Done setting up parameters, make the call. */
11185 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11186
11187 /* The value variable is implictly flushed. */
11188 iemNativeVarFreeLocal(pReNative, idxVarValue);
11189
11190 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11191
11192 return off;
11193}
11194
11195
11196
11197/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
11198#define IEM_MC_POP_GREG_U16(a_iGReg) \
11199 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11200 (uintptr_t)iemNativeHlpStackPopGRegU16, pCallEntry->idxInstr)
11201#define IEM_MC_POP_GREG_U32(a_iGReg) \
11202 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11203 (uintptr_t)iemNativeHlpStackPopGRegU32, pCallEntry->idxInstr)
11204#define IEM_MC_POP_GREG_U64(a_iGReg) \
11205 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11206 (uintptr_t)iemNativeHlpStackPopGRegU64, pCallEntry->idxInstr)
11207
11208#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
11209 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11210 (uintptr_t)iemNativeHlpStackFlat32PopGRegU16, pCallEntry->idxInstr)
11211#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
11212 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11213 (uintptr_t)iemNativeHlpStackFlat32PopGRegU32, pCallEntry->idxInstr)
11214
11215#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
11216 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11217 (uintptr_t)iemNativeHlpStackFlat64PopGRegU16, pCallEntry->idxInstr)
11218#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
11219 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11220 (uintptr_t)iemNativeHlpStackFlat64PopGRegU64, pCallEntry->idxInstr)
11221
11222/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
11223DECL_INLINE_THROW(uint32_t)
11224iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
11225 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11226{
11227 /*
11228 * Assert sanity.
11229 */
11230 Assert(idxGReg < 16);
11231#ifdef VBOX_STRICT
11232 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11233 {
11234 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11235 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11236 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11237 Assert( pfnFunction
11238 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU16
11239 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU32
11240 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU16
11241 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU64
11242 : UINT64_C(0xc000b000a0009000) ));
11243 }
11244 else
11245 Assert( pfnFunction
11246 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU16
11247 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU32
11248 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU64
11249 : UINT64_C(0xc000b000a0009000) ));
11250#endif
11251
11252#ifdef VBOX_STRICT
11253 /*
11254 * Check that the fExec flags we've got make sense.
11255 */
11256 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11257#endif
11258
11259 /*
11260 * To keep things simple we have to commit any pending writes first as we
11261 * may end up making calls.
11262 */
11263 /** @todo we could postpone this till we make the call and reload the
11264 * registers after returning from the call. Not sure if that's sensible or
11265 * not, though. */
11266 off = iemNativeRegFlushPendingWrites(pReNative, off);
11267
11268 /*
11269 * Move/spill/flush stuff out of call-volatile registers.
11270 * This is the easy way out. We could contain this to the tlb-miss branch
11271 * by saving and restoring active stuff here.
11272 */
11273 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
11274 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11275
11276 /* For now, flush the any shadow copy of the guest register that is about
11277 to be popped and the xSP register. */
11278 iemNativeRegFlushGuestShadows(pReNative,
11279 RT_BIT_64(IEMNATIVEGSTREG_GPR(idxGReg)) | RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
11280
11281 /*
11282 * Define labels and allocate the result register (trying for the return
11283 * register if we can).
11284 */
11285 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11286 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11287 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11288
11289 /*
11290 * First we try to go via the TLB.
11291 */
11292//pReNative->pInstrBuf[off++] = 0xcc;
11293 /** @todo later. */
11294 RT_NOREF(cBitsVarAndFlat);
11295
11296 /*
11297 * Call helper to do the popping.
11298 */
11299 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11300
11301#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11302 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11303#else
11304 RT_NOREF(idxInstr);
11305#endif
11306
11307 /* IEMNATIVE_CALL_ARG1_GREG = iGReg */
11308 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxGReg);
11309
11310 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11311 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11312
11313 /* Done setting up parameters, make the call. */
11314 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11315
11316 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11317
11318 return off;
11319}
11320
11321
11322
11323/*********************************************************************************************************************************
11324* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
11325*********************************************************************************************************************************/
11326
11327#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11328 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11329 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11330 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
11331
11332#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11333 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11334 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11335 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
11336
11337#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11338 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11339 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
11340 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
11341
11342
11343#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11344 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11345 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11346 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
11347
11348#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11349 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11350 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11351 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
11352
11353#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11354 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11355 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11356 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
11357
11358#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11359 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
11360 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11361 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
11362
11363
11364#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11365 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11366 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11367 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
11368
11369#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11370 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11371 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11372 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
11373
11374#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11375 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11376 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11377 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
11378
11379#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11380 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
11381 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11382 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
11383
11384
11385#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11386 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11387 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11388 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
11389
11390#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11391 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11392 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11393 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
11394
11395#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11396 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11397 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11398 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
11399
11400#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11401 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
11402 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11403 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
11404
11405
11406#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11407 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
11408 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11409 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
11410
11411#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11412 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
11413 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
11414 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
11415
11416
11417#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11418 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
11419 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11420 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
11421
11422#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11423 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
11424 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11425 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
11426
11427#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11428 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
11429 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11430 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
11431
11432
11433
11434#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
11435 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11436 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11437 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
11438
11439#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
11440 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11441 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11442 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
11443
11444#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
11445 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11446 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
11447 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
11448
11449
11450#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
11451 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11452 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11453 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
11454
11455#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
11456 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11457 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11458 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
11459
11460#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
11461 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11462 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11463 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
11464
11465#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
11466 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
11467 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11468 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
11469
11470
11471#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
11472 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11473 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11474 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
11475
11476#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
11477 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11478 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11479 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
11480
11481#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
11482 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11483 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11484 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
11485
11486#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
11487 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
11488 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11489 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
11490
11491
11492#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
11493 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11494 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11495 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
11496
11497#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
11498 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11499 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11500 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
11501
11502#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
11503 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11504 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11505 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
11506
11507#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
11508 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
11509 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11510 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
11511
11512
11513#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
11514 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
11515 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11516 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
11517
11518#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
11519 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
11520 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
11521 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
11522
11523
11524#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
11525 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
11526 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11527 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
11528
11529#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
11530 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
11531 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11532 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
11533
11534#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
11535 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
11536 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11537 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
11538
11539
11540DECL_INLINE_THROW(uint32_t)
11541iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
11542 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
11543 uintptr_t pfnFunction, uint8_t idxInstr)
11544{
11545 /*
11546 * Assert sanity.
11547 */
11548 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
11549 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
11550 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
11551 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11552
11553 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
11554 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
11555 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
11556 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11557
11558 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11559 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
11560 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
11561 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11562
11563 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11564
11565 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11566
11567#ifdef VBOX_STRICT
11568# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
11569 ( ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
11570 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
11571 : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
11572 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
11573
11574 if (iSegReg == UINT8_MAX)
11575 {
11576 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11577 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11578 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11579 switch (cbMem)
11580 {
11581 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
11582 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
11583 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
11584 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
11585 case 10:
11586 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
11587 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
11588 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
11589 break;
11590 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
11591# if 0
11592 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
11593 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
11594# endif
11595 default: AssertFailed(); break;
11596 }
11597 }
11598 else
11599 {
11600 Assert(iSegReg < 6);
11601 switch (cbMem)
11602 {
11603 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
11604 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
11605 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
11606 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
11607 case 10:
11608 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
11609 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
11610 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
11611 break;
11612 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
11613# if 0
11614 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
11615 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
11616# endif
11617 default: AssertFailed(); break;
11618 }
11619 }
11620# undef IEM_MAP_HLP_FN
11621#endif
11622
11623#ifdef VBOX_STRICT
11624 /*
11625 * Check that the fExec flags we've got make sense.
11626 */
11627 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11628#endif
11629
11630 /*
11631 * To keep things simple we have to commit any pending writes first as we
11632 * may end up making calls.
11633 */
11634 off = iemNativeRegFlushPendingWrites(pReNative, off);
11635
11636#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11637 /*
11638 * Move/spill/flush stuff out of call-volatile registers.
11639 * This is the easy way out. We could contain this to the tlb-miss branch
11640 * by saving and restoring active stuff here.
11641 */
11642 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
11643 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11644#endif
11645
11646 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
11647 while the tlb-miss codepath will temporarily put it on the stack.
11648 Set the the type to stack here so we don't need to do it twice below. */
11649 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
11650 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
11651 /** @todo use a tmp register from TlbState, since they'll be free after tlb
11652 * lookup is done. */
11653
11654 /*
11655 * Define labels and allocate the result register (trying for the return
11656 * register if we can - which we of course can, given the above call).
11657 */
11658 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11659 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11660 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
11661 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
11662
11663 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
11664
11665 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11666 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11667 : UINT32_MAX;
11668//off=iemNativeEmitBrk(pReNative, off, 0);
11669 /*
11670 * Jump to the TLB lookup code.
11671 */
11672 if (!TlbState.fSkip)
11673 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11674
11675 /*
11676 * tlbmiss:
11677 *
11678 * Call helper to do the fetching.
11679 * We flush all guest register shadow copies here.
11680 */
11681 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11682
11683#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11684 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11685#else
11686 RT_NOREF(idxInstr);
11687#endif
11688
11689#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11690 /* Save variables in volatile registers. */
11691 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
11692 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11693#endif
11694
11695 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
11696 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
11697#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11698 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
11699#else
11700 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11701#endif
11702
11703 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
11704 if (iSegReg != UINT8_MAX)
11705 {
11706 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11707 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
11708 }
11709
11710 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
11711 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
11712 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
11713
11714 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11715 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11716
11717 /* Done setting up parameters, make the call. */
11718 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11719
11720 /*
11721 * Put the output in the right registers.
11722 */
11723 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
11724 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
11725 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
11726
11727#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11728 /* Restore variables and guest shadow registers to volatile registers. */
11729 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11730 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11731#endif
11732
11733 Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
11734 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
11735
11736#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11737 if (!TlbState.fSkip)
11738 {
11739 /* end of tlbsmiss - Jump to the done label. */
11740 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11741 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11742
11743 /*
11744 * tlblookup:
11745 */
11746 off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
11747 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
11748 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
11749
11750 /*
11751 * Lookup tail code, specific to the MC when the above is moved into a separate function.
11752 */
11753 /* [idxVarUnmapInfo] = 0 - allocate register for it. There must be free ones now, so no spilling required. */
11754 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
11755
11756 /*
11757 * tlbdone:
11758 */
11759 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11760
11761# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11762 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
11763 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11764# endif
11765 }
11766#else
11767 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
11768#endif
11769
11770 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
11771 iemNativeVarRegisterRelease(pReNative, idxVarMem);
11772
11773 return off;
11774}
11775
11776
11777#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
11778 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
11779 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
11780
11781#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
11782 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
11783 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
11784
11785#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
11786 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
11787 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
11788
11789DECL_INLINE_THROW(uint32_t)
11790iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
11791 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
11792{
11793 /*
11794 * Assert sanity.
11795 */
11796 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
11797 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
11798 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
11799 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
11800#ifdef VBOX_STRICT
11801 switch (fAccess & IEM_ACCESS_TYPE_MASK)
11802 {
11803 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
11804 case IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
11805 case IEM_ACCESS_TYPE_READ: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
11806 default: AssertFailed();
11807 }
11808#else
11809 RT_NOREF(fAccess);
11810#endif
11811
11812 /*
11813 * To keep things simple we have to commit any pending writes first as we
11814 * may end up making calls (there shouldn't be any at this point, so this
11815 * is just for consistency).
11816 */
11817 /** @todo we could postpone this till we make the call and reload the
11818 * registers after returning from the call. Not sure if that's sensible or
11819 * not, though. */
11820 off = iemNativeRegFlushPendingWrites(pReNative, off);
11821
11822 /*
11823 * Move/spill/flush stuff out of call-volatile registers.
11824 *
11825 * We exclude any register holding the bUnmapInfo variable, as we'll be
11826 * checking it after returning from the call and will free it afterwards.
11827 */
11828 /** @todo save+restore active registers and maybe guest shadows in miss
11829 * scenario. */
11830 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
11831
11832 /*
11833 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
11834 * to call the unmap helper function.
11835 *
11836 * The likelyhood of it being zero is higher than for the TLB hit when doing
11837 * the mapping, as a TLB miss for an well aligned and unproblematic memory
11838 * access should also end up with a mapping that won't need special unmapping.
11839 */
11840 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
11841 * should speed up things for the pure interpreter as well when TLBs
11842 * are enabled. */
11843#ifdef RT_ARCH_AMD64
11844 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
11845 {
11846 /* test byte [rbp - xxx], 0ffh */
11847 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11848 pbCodeBuf[off++] = 0xf6;
11849 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
11850 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11851 pbCodeBuf[off++] = 0xff;
11852 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11853 }
11854 else
11855#endif
11856 {
11857 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
11858 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
11859 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
11860 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
11861 }
11862 uint32_t const offJmpFixup = off;
11863 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
11864
11865 /*
11866 * Call the unmap helper function.
11867 */
11868#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
11869 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11870#else
11871 RT_NOREF(idxInstr);
11872#endif
11873
11874 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
11875 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
11876 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11877
11878 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11879 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11880
11881 /* Done setting up parameters, make the call. */
11882 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11883
11884 /* The bUnmapInfo variable is implictly free by these MCs. */
11885 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
11886
11887 /*
11888 * Done, just fixup the jump for the non-call case.
11889 */
11890 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
11891
11892 return off;
11893}
11894
11895
11896
11897/*********************************************************************************************************************************
11898* State and Exceptions *
11899*********************************************************************************************************************************/
11900
11901#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11902#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11903
11904#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11905#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11906#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11907
11908#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11909#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11910#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11911
11912
11913DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
11914{
11915 /** @todo this needs a lot more work later. */
11916 RT_NOREF(pReNative, fForChange);
11917 return off;
11918}
11919
11920
11921/*********************************************************************************************************************************
11922* The native code generator functions for each MC block. *
11923*********************************************************************************************************************************/
11924
11925
11926/*
11927 * Include g_apfnIemNativeRecompileFunctions and associated functions.
11928 *
11929 * This should probably live in it's own file later, but lets see what the
11930 * compile times turn out to be first.
11931 */
11932#include "IEMNativeFunctions.cpp.h"
11933
11934
11935
11936/*********************************************************************************************************************************
11937* Recompiler Core. *
11938*********************************************************************************************************************************/
11939
11940
11941/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
11942static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
11943{
11944 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
11945 pDis->cbCachedInstr += cbMaxRead;
11946 RT_NOREF(cbMinRead);
11947 return VERR_NO_DATA;
11948}
11949
11950
11951/**
11952 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
11953 * @returns pszBuf.
11954 * @param fFlags The flags.
11955 * @param pszBuf The output buffer.
11956 * @param cbBuf The output buffer size. At least 32 bytes.
11957 */
11958DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
11959{
11960 Assert(cbBuf >= 32);
11961 static RTSTRTUPLE const s_aModes[] =
11962 {
11963 /* [00] = */ { RT_STR_TUPLE("16BIT") },
11964 /* [01] = */ { RT_STR_TUPLE("32BIT") },
11965 /* [02] = */ { RT_STR_TUPLE("!2!") },
11966 /* [03] = */ { RT_STR_TUPLE("!3!") },
11967 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
11968 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
11969 /* [06] = */ { RT_STR_TUPLE("!6!") },
11970 /* [07] = */ { RT_STR_TUPLE("!7!") },
11971 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
11972 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
11973 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
11974 /* [0b] = */ { RT_STR_TUPLE("!b!") },
11975 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
11976 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
11977 /* [0e] = */ { RT_STR_TUPLE("!e!") },
11978 /* [0f] = */ { RT_STR_TUPLE("!f!") },
11979 /* [10] = */ { RT_STR_TUPLE("!10!") },
11980 /* [11] = */ { RT_STR_TUPLE("!11!") },
11981 /* [12] = */ { RT_STR_TUPLE("!12!") },
11982 /* [13] = */ { RT_STR_TUPLE("!13!") },
11983 /* [14] = */ { RT_STR_TUPLE("!14!") },
11984 /* [15] = */ { RT_STR_TUPLE("!15!") },
11985 /* [16] = */ { RT_STR_TUPLE("!16!") },
11986 /* [17] = */ { RT_STR_TUPLE("!17!") },
11987 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
11988 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
11989 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
11990 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
11991 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
11992 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
11993 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
11994 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
11995 };
11996 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
11997 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
11998 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
11999
12000 pszBuf[off++] = ' ';
12001 pszBuf[off++] = 'C';
12002 pszBuf[off++] = 'P';
12003 pszBuf[off++] = 'L';
12004 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
12005 Assert(off < 32);
12006
12007 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
12008
12009 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
12010 {
12011 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
12012 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
12013 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
12014 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
12015 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
12016 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
12017 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
12018 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
12019 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
12020 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
12021 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
12022 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
12023 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
12024 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
12025 };
12026 if (fFlags)
12027 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
12028 if (s_aFlags[i].fFlag & fFlags)
12029 {
12030 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
12031 pszBuf[off++] = ' ';
12032 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
12033 off += s_aFlags[i].cchName;
12034 fFlags &= ~s_aFlags[i].fFlag;
12035 if (!fFlags)
12036 break;
12037 }
12038 pszBuf[off] = '\0';
12039
12040 return pszBuf;
12041}
12042
12043
12044DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
12045{
12046 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
12047#if defined(RT_ARCH_AMD64)
12048 static const char * const a_apszMarkers[] =
12049 {
12050 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
12051 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
12052 };
12053#endif
12054
12055 char szDisBuf[512];
12056 DISSTATE Dis;
12057 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
12058 uint32_t const cNative = pTb->Native.cInstructions;
12059 uint32_t offNative = 0;
12060#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12061 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
12062#endif
12063 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
12064 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
12065 : DISCPUMODE_64BIT;
12066#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12067 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
12068#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12069 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
12070#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12071# error "Port me"
12072#else
12073 csh hDisasm = ~(size_t)0;
12074# if defined(RT_ARCH_AMD64)
12075 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
12076# elif defined(RT_ARCH_ARM64)
12077 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
12078# else
12079# error "Port me"
12080# endif
12081 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
12082#endif
12083
12084 /*
12085 * Print TB info.
12086 */
12087 pHlp->pfnPrintf(pHlp,
12088 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
12089 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
12090 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
12091 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
12092#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12093 if (pDbgInfo && pDbgInfo->cEntries > 1)
12094 {
12095 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
12096
12097 /*
12098 * This disassembly is driven by the debug info which follows the native
12099 * code and indicates when it starts with the next guest instructions,
12100 * where labels are and such things.
12101 */
12102 uint32_t idxThreadedCall = 0;
12103 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
12104 uint8_t idxRange = UINT8_MAX;
12105 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
12106 uint32_t offRange = 0;
12107 uint32_t offOpcodes = 0;
12108 uint32_t const cbOpcodes = pTb->cbOpcodes;
12109 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
12110 uint32_t const cDbgEntries = pDbgInfo->cEntries;
12111 uint32_t iDbgEntry = 1;
12112 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
12113
12114 while (offNative < cNative)
12115 {
12116 /* If we're at or have passed the point where the next chunk of debug
12117 info starts, process it. */
12118 if (offDbgNativeNext <= offNative)
12119 {
12120 offDbgNativeNext = UINT32_MAX;
12121 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
12122 {
12123 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
12124 {
12125 case kIemTbDbgEntryType_GuestInstruction:
12126 {
12127 /* Did the exec flag change? */
12128 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
12129 {
12130 pHlp->pfnPrintf(pHlp,
12131 " fExec change %#08x -> %#08x %s\n",
12132 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
12133 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
12134 szDisBuf, sizeof(szDisBuf)));
12135 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
12136 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
12137 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
12138 : DISCPUMODE_64BIT;
12139 }
12140
12141 /* New opcode range? We need to fend up a spurious debug info entry here for cases
12142 where the compilation was aborted before the opcode was recorded and the actual
12143 instruction was translated to a threaded call. This may happen when we run out
12144 of ranges, or when some complicated interrupts/FFs are found to be pending or
12145 similar. So, we just deal with it here rather than in the compiler code as it
12146 is a lot simpler to do here. */
12147 if ( idxRange == UINT8_MAX
12148 || idxRange >= cRanges
12149 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
12150 {
12151 idxRange += 1;
12152 if (idxRange < cRanges)
12153 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
12154 else
12155 continue;
12156 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
12157 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
12158 + (pTb->aRanges[idxRange].idxPhysPage == 0
12159 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
12160 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
12161 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
12162 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
12163 pTb->aRanges[idxRange].idxPhysPage);
12164 GCPhysPc += offRange;
12165 }
12166
12167 /* Disassemble the instruction. */
12168 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
12169 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
12170 uint32_t cbInstr = 1;
12171 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
12172 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
12173 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
12174 if (RT_SUCCESS(rc))
12175 {
12176 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12177 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12178 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12179 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12180
12181 static unsigned const s_offMarker = 55;
12182 static char const s_szMarker[] = " ; <--- guest";
12183 if (cch < s_offMarker)
12184 {
12185 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
12186 cch = s_offMarker;
12187 }
12188 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
12189 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
12190
12191 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
12192 }
12193 else
12194 {
12195 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
12196 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
12197 cbInstr = 1;
12198 }
12199 GCPhysPc += cbInstr;
12200 offOpcodes += cbInstr;
12201 offRange += cbInstr;
12202 continue;
12203 }
12204
12205 case kIemTbDbgEntryType_ThreadedCall:
12206 pHlp->pfnPrintf(pHlp,
12207 " Call #%u to %s (%u args) - %s\n",
12208 idxThreadedCall,
12209 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
12210 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
12211 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
12212 idxThreadedCall++;
12213 continue;
12214
12215 case kIemTbDbgEntryType_GuestRegShadowing:
12216 {
12217 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
12218 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
12219 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
12220 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
12221 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
12222 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
12223 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
12224 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
12225 else
12226 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
12227 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
12228 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
12229 continue;
12230 }
12231
12232 case kIemTbDbgEntryType_Label:
12233 {
12234 const char *pszName = "what_the_fudge";
12235 const char *pszComment = "";
12236 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
12237 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
12238 {
12239 case kIemNativeLabelType_Return:
12240 pszName = "Return";
12241 break;
12242 case kIemNativeLabelType_ReturnBreak:
12243 pszName = "ReturnBreak";
12244 break;
12245 case kIemNativeLabelType_ReturnWithFlags:
12246 pszName = "ReturnWithFlags";
12247 break;
12248 case kIemNativeLabelType_NonZeroRetOrPassUp:
12249 pszName = "NonZeroRetOrPassUp";
12250 break;
12251 case kIemNativeLabelType_RaiseGp0:
12252 pszName = "RaiseGp0";
12253 break;
12254 case kIemNativeLabelType_ObsoleteTb:
12255 pszName = "ObsoleteTb";
12256 break;
12257 case kIemNativeLabelType_NeedCsLimChecking:
12258 pszName = "NeedCsLimChecking";
12259 break;
12260 case kIemNativeLabelType_CheckBranchMiss:
12261 pszName = "CheckBranchMiss";
12262 break;
12263 case kIemNativeLabelType_If:
12264 pszName = "If";
12265 fNumbered = true;
12266 break;
12267 case kIemNativeLabelType_Else:
12268 pszName = "Else";
12269 fNumbered = true;
12270 pszComment = " ; regs state restored pre-if-block";
12271 break;
12272 case kIemNativeLabelType_Endif:
12273 pszName = "Endif";
12274 fNumbered = true;
12275 break;
12276 case kIemNativeLabelType_CheckIrq:
12277 pszName = "CheckIrq_CheckVM";
12278 fNumbered = true;
12279 break;
12280 case kIemNativeLabelType_TlbLookup:
12281 pszName = "TlbLookup";
12282 fNumbered = true;
12283 break;
12284 case kIemNativeLabelType_TlbMiss:
12285 pszName = "TlbMiss";
12286 fNumbered = true;
12287 break;
12288 case kIemNativeLabelType_TlbDone:
12289 pszName = "TlbDone";
12290 fNumbered = true;
12291 break;
12292 case kIemNativeLabelType_Invalid:
12293 case kIemNativeLabelType_End:
12294 break;
12295 }
12296 if (fNumbered)
12297 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
12298 else
12299 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
12300 continue;
12301 }
12302
12303 case kIemTbDbgEntryType_NativeOffset:
12304 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
12305 Assert(offDbgNativeNext > offNative);
12306 break;
12307
12308 default:
12309 AssertFailed();
12310 }
12311 iDbgEntry++;
12312 break;
12313 }
12314 }
12315
12316 /*
12317 * Disassemble the next native instruction.
12318 */
12319 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
12320# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12321 uint32_t cbInstr = sizeof(paNative[0]);
12322 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
12323 if (RT_SUCCESS(rc))
12324 {
12325# if defined(RT_ARCH_AMD64)
12326 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
12327 {
12328 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
12329 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
12330 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
12331 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
12332 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
12333 uInfo & 0x8000 ? "recompiled" : "todo");
12334 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
12335 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
12336 else
12337 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
12338 }
12339 else
12340# endif
12341 {
12342# ifdef RT_ARCH_AMD64
12343 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12344 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12345 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12346 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12347# elif defined(RT_ARCH_ARM64)
12348 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
12349 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12350 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12351# else
12352# error "Port me"
12353# endif
12354 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
12355 }
12356 }
12357 else
12358 {
12359# if defined(RT_ARCH_AMD64)
12360 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
12361 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
12362# elif defined(RT_ARCH_ARM64)
12363 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
12364# else
12365# error "Port me"
12366# endif
12367 cbInstr = sizeof(paNative[0]);
12368 }
12369 offNative += cbInstr / sizeof(paNative[0]);
12370
12371# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12372 cs_insn *pInstr;
12373 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
12374 (uintptr_t)pNativeCur, 1, &pInstr);
12375 if (cInstrs > 0)
12376 {
12377 Assert(cInstrs == 1);
12378# if defined(RT_ARCH_AMD64)
12379 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
12380 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
12381# else
12382 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
12383 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
12384# endif
12385 offNative += pInstr->size / sizeof(*pNativeCur);
12386 cs_free(pInstr, cInstrs);
12387 }
12388 else
12389 {
12390# if defined(RT_ARCH_AMD64)
12391 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
12392 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
12393# else
12394 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
12395# endif
12396 offNative++;
12397 }
12398# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12399 }
12400 }
12401 else
12402#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
12403 {
12404 /*
12405 * No debug info, just disassemble the x86 code and then the native code.
12406 *
12407 * First the guest code:
12408 */
12409 for (unsigned i = 0; i < pTb->cRanges; i++)
12410 {
12411 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
12412 + (pTb->aRanges[i].idxPhysPage == 0
12413 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
12414 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
12415 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
12416 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
12417 unsigned off = pTb->aRanges[i].offOpcodes;
12418 /** @todo this ain't working when crossing pages! */
12419 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
12420 while (off < cbOpcodes)
12421 {
12422 uint32_t cbInstr = 1;
12423 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
12424 &pTb->pabOpcodes[off], cbOpcodes - off,
12425 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
12426 if (RT_SUCCESS(rc))
12427 {
12428 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12429 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12430 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12431 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12432 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
12433 GCPhysPc += cbInstr;
12434 off += cbInstr;
12435 }
12436 else
12437 {
12438 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
12439 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
12440 break;
12441 }
12442 }
12443 }
12444
12445 /*
12446 * Then the native code:
12447 */
12448 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
12449 while (offNative < cNative)
12450 {
12451 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
12452# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12453 uint32_t cbInstr = sizeof(paNative[0]);
12454 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
12455 if (RT_SUCCESS(rc))
12456 {
12457# if defined(RT_ARCH_AMD64)
12458 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
12459 {
12460 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
12461 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
12462 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
12463 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
12464 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
12465 uInfo & 0x8000 ? "recompiled" : "todo");
12466 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
12467 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
12468 else
12469 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
12470 }
12471 else
12472# endif
12473 {
12474# ifdef RT_ARCH_AMD64
12475 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12476 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12477 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12478 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12479# elif defined(RT_ARCH_ARM64)
12480 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
12481 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12482 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12483# else
12484# error "Port me"
12485# endif
12486 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
12487 }
12488 }
12489 else
12490 {
12491# if defined(RT_ARCH_AMD64)
12492 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
12493 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
12494# else
12495 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
12496# endif
12497 cbInstr = sizeof(paNative[0]);
12498 }
12499 offNative += cbInstr / sizeof(paNative[0]);
12500
12501# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12502 cs_insn *pInstr;
12503 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
12504 (uintptr_t)pNativeCur, 1, &pInstr);
12505 if (cInstrs > 0)
12506 {
12507 Assert(cInstrs == 1);
12508# if defined(RT_ARCH_AMD64)
12509 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
12510 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
12511# else
12512 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
12513 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
12514# endif
12515 offNative += pInstr->size / sizeof(*pNativeCur);
12516 cs_free(pInstr, cInstrs);
12517 }
12518 else
12519 {
12520# if defined(RT_ARCH_AMD64)
12521 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
12522 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
12523# else
12524 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
12525# endif
12526 offNative++;
12527 }
12528# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12529 }
12530 }
12531
12532#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12533 /* Cleanup. */
12534 cs_close(&hDisasm);
12535#endif
12536}
12537
12538
12539/**
12540 * Recompiles the given threaded TB into a native one.
12541 *
12542 * In case of failure the translation block will be returned as-is.
12543 *
12544 * @returns pTb.
12545 * @param pVCpu The cross context virtual CPU structure of the calling
12546 * thread.
12547 * @param pTb The threaded translation to recompile to native.
12548 */
12549DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
12550{
12551 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
12552
12553 /*
12554 * The first time thru, we allocate the recompiler state, the other times
12555 * we just need to reset it before using it again.
12556 */
12557 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
12558 if (RT_LIKELY(pReNative))
12559 iemNativeReInit(pReNative, pTb);
12560 else
12561 {
12562 pReNative = iemNativeInit(pVCpu, pTb);
12563 AssertReturn(pReNative, pTb);
12564 }
12565
12566 /*
12567 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
12568 * for aborting if an error happens.
12569 */
12570 uint32_t cCallsLeft = pTb->Thrd.cCalls;
12571#ifdef LOG_ENABLED
12572 uint32_t const cCallsOrg = cCallsLeft;
12573#endif
12574 uint32_t off = 0;
12575 int rc = VINF_SUCCESS;
12576 IEMNATIVE_TRY_SETJMP(pReNative, rc)
12577 {
12578 /*
12579 * Emit prolog code (fixed).
12580 */
12581 off = iemNativeEmitProlog(pReNative, off);
12582
12583 /*
12584 * Convert the calls to native code.
12585 */
12586#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12587 int32_t iGstInstr = -1;
12588#endif
12589#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
12590 uint32_t cThreadedCalls = 0;
12591 uint32_t cRecompiledCalls = 0;
12592#endif
12593 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
12594 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
12595 while (cCallsLeft-- > 0)
12596 {
12597 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
12598
12599 /*
12600 * Debug info and assembly markup.
12601 */
12602 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
12603 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
12604#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12605 iemNativeDbgInfoAddNativeOffset(pReNative, off);
12606 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
12607 {
12608 if (iGstInstr < (int32_t)pTb->cInstructions)
12609 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
12610 else
12611 Assert(iGstInstr == pTb->cInstructions);
12612 iGstInstr = pCallEntry->idxInstr;
12613 }
12614 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
12615#endif
12616#if defined(VBOX_STRICT)
12617 off = iemNativeEmitMarker(pReNative, off,
12618 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
12619 pCallEntry->enmFunction));
12620#endif
12621#if defined(VBOX_STRICT)
12622 iemNativeRegAssertSanity(pReNative);
12623#endif
12624
12625 /*
12626 * Actual work.
12627 */
12628 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
12629 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)"));
12630 if (pfnRecom) /** @todo stats on this. */
12631 {
12632 off = pfnRecom(pReNative, off, pCallEntry);
12633 STAM_REL_STATS({cRecompiledCalls++;});
12634 }
12635 else
12636 {
12637 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
12638 STAM_REL_STATS({cThreadedCalls++;});
12639 }
12640 Assert(off <= pReNative->cInstrBufAlloc);
12641 Assert(pReNative->cCondDepth == 0);
12642
12643 /*
12644 * Advance.
12645 */
12646 pCallEntry++;
12647 }
12648
12649 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
12650 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
12651 if (!cThreadedCalls)
12652 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
12653
12654 /*
12655 * Emit the epilog code.
12656 */
12657 uint32_t idxReturnLabel;
12658 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
12659
12660 /*
12661 * Generate special jump labels.
12662 */
12663 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
12664 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
12665 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
12666 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
12667 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
12668 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
12669 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
12670 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
12671 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
12672 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
12673 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
12674 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
12675 }
12676 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
12677 {
12678 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
12679 return pTb;
12680 }
12681 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
12682 Assert(off <= pReNative->cInstrBufAlloc);
12683
12684 /*
12685 * Make sure all labels has been defined.
12686 */
12687 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
12688#ifdef VBOX_STRICT
12689 uint32_t const cLabels = pReNative->cLabels;
12690 for (uint32_t i = 0; i < cLabels; i++)
12691 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
12692#endif
12693
12694 /*
12695 * Allocate executable memory, copy over the code we've generated.
12696 */
12697 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
12698 if (pTbAllocator->pDelayedFreeHead)
12699 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
12700
12701 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
12702 AssertReturn(paFinalInstrBuf, pTb);
12703 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
12704
12705 /*
12706 * Apply fixups.
12707 */
12708 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
12709 uint32_t const cFixups = pReNative->cFixups;
12710 for (uint32_t i = 0; i < cFixups; i++)
12711 {
12712 Assert(paFixups[i].off < off);
12713 Assert(paFixups[i].idxLabel < cLabels);
12714 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
12715 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
12716 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
12717 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
12718 switch (paFixups[i].enmType)
12719 {
12720#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
12721 case kIemNativeFixupType_Rel32:
12722 Assert(paFixups[i].off + 4 <= off);
12723 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12724 continue;
12725
12726#elif defined(RT_ARCH_ARM64)
12727 case kIemNativeFixupType_RelImm26At0:
12728 {
12729 Assert(paFixups[i].off < off);
12730 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12731 Assert(offDisp >= -262144 && offDisp < 262144);
12732 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
12733 continue;
12734 }
12735
12736 case kIemNativeFixupType_RelImm19At5:
12737 {
12738 Assert(paFixups[i].off < off);
12739 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12740 Assert(offDisp >= -262144 && offDisp < 262144);
12741 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
12742 continue;
12743 }
12744
12745 case kIemNativeFixupType_RelImm14At5:
12746 {
12747 Assert(paFixups[i].off < off);
12748 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12749 Assert(offDisp >= -8192 && offDisp < 8192);
12750 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
12751 continue;
12752 }
12753
12754#endif
12755 case kIemNativeFixupType_Invalid:
12756 case kIemNativeFixupType_End:
12757 break;
12758 }
12759 AssertFailed();
12760 }
12761
12762 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
12763 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
12764
12765 /*
12766 * Convert the translation block.
12767 */
12768 RTMemFree(pTb->Thrd.paCalls);
12769 pTb->Native.paInstructions = paFinalInstrBuf;
12770 pTb->Native.cInstructions = off;
12771 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
12772#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12773 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
12774 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
12775#endif
12776
12777 Assert(pTbAllocator->cThreadedTbs > 0);
12778 pTbAllocator->cThreadedTbs -= 1;
12779 pTbAllocator->cNativeTbs += 1;
12780 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
12781
12782#ifdef LOG_ENABLED
12783 /*
12784 * Disassemble to the log if enabled.
12785 */
12786 if (LogIs3Enabled())
12787 {
12788 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
12789 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
12790# ifdef DEBUG_bird
12791 RTLogFlush(NULL);
12792# endif
12793 }
12794#endif
12795 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
12796
12797 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
12798 return pTb;
12799}
12800
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette