VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102876

Last change on this file since 102876 was 102876, checked in by vboxsync, 12 months ago

VMM/IEM: Call different threaded functions for each branch in a conditional jump (jcc, loop, loopcc) so we can quit immediately when taking a different branch from what we did during compilation. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 595.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102876 2024-01-15 14:26:27Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when detecting opcode changes.
1601 * @see iemThreadeFuncWorkerObsoleteTb
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1604{
1605 /* We set fSafeToFree to false where as we're being called in the context
1606 of a TB callback function, which for native TBs means we cannot release
1607 the executable memory till we've returned our way back to iemTbExec as
1608 that return path codes via the native code generated for the TB. */
1609 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1610 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1611 return VINF_IEM_REEXEC_BREAK;
1612}
1613
1614
1615/**
1616 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1619{
1620 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1621 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1622 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1623 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1624 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1625 return VINF_IEM_REEXEC_BREAK;
1626}
1627
1628
1629/**
1630 * Used by TB code when we missed a PC check after a branch.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1633{
1634 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1635 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1636 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1637 pVCpu->iem.s.pbInstrBuf));
1638 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1639 return VINF_IEM_REEXEC_BREAK;
1640}
1641
1642
1643
1644/*********************************************************************************************************************************
1645* Helpers: Segmented memory fetches and stores. *
1646*********************************************************************************************************************************/
1647
1648/**
1649 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1652{
1653#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1654 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1655#else
1656 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1657#endif
1658}
1659
1660
1661/**
1662 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1663 * to 16 bits.
1664 */
1665IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1666{
1667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1668 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1669#else
1670 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1671#endif
1672}
1673
1674
1675/**
1676 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1677 * to 32 bits.
1678 */
1679IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1680{
1681#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1682 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1683#else
1684 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1685#endif
1686}
1687
1688/**
1689 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1690 * to 64 bits.
1691 */
1692IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1693{
1694#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1695 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1696#else
1697 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1698#endif
1699}
1700
1701
1702/**
1703 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1704 */
1705IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1706{
1707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1708 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1709#else
1710 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1711#endif
1712}
1713
1714
1715/**
1716 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1717 * to 32 bits.
1718 */
1719IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1720{
1721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1722 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1723#else
1724 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1725#endif
1726}
1727
1728
1729/**
1730 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1731 * to 64 bits.
1732 */
1733IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1734{
1735#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1736 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1737#else
1738 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1739#endif
1740}
1741
1742
1743/**
1744 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1745 */
1746IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1747{
1748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1749 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1750#else
1751 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1752#endif
1753}
1754
1755
1756/**
1757 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1758 * to 64 bits.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1761{
1762#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1763 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1764#else
1765 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1766#endif
1767}
1768
1769
1770/**
1771 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1772 */
1773IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1774{
1775#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1776 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1777#else
1778 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1779#endif
1780}
1781
1782
1783/**
1784 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1789 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1790#else
1791 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1792#endif
1793}
1794
1795
1796/**
1797 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1802 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1803#else
1804 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1815 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1816#else
1817 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1828 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1829#else
1830 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1831#endif
1832}
1833
1834
1835
1836/**
1837 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1838 */
1839IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1840{
1841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1842 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1843#else
1844 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1845#endif
1846}
1847
1848
1849/**
1850 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1851 */
1852IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1853{
1854#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1855 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1856#else
1857 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1858#endif
1859}
1860
1861
1862/**
1863 * Used by TB code to store an 32-bit selector value onto a generic stack.
1864 *
1865 * Intel CPUs doesn't do write a whole dword, thus the special function.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1870 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1871#else
1872 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1883 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1884#else
1885 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1886#endif
1887}
1888
1889
1890/**
1891 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1896 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1897#else
1898 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1909 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1910#else
1911 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1922 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1923#else
1924 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1925#endif
1926}
1927
1928
1929
1930/*********************************************************************************************************************************
1931* Helpers: Flat memory fetches and stores. *
1932*********************************************************************************************************************************/
1933
1934/**
1935 * Used by TB code to load unsigned 8-bit data w/ flat address.
1936 * @note Zero extending the value to 64-bit to simplify assembly.
1937 */
1938IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1939{
1940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1941 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1942#else
1943 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1944#endif
1945}
1946
1947
1948/**
1949 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1950 * to 16 bits.
1951 * @note Zero extending the value to 64-bit to simplify assembly.
1952 */
1953IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1954{
1955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1956 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1957#else
1958 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1959#endif
1960}
1961
1962
1963/**
1964 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1965 * to 32 bits.
1966 * @note Zero extending the value to 64-bit to simplify assembly.
1967 */
1968IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1969{
1970#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1971 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1972#else
1973 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1974#endif
1975}
1976
1977
1978/**
1979 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1980 * to 64 bits.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1983{
1984#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1985 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1986#else
1987 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1988#endif
1989}
1990
1991
1992/**
1993 * Used by TB code to load unsigned 16-bit data w/ flat address.
1994 * @note Zero extending the value to 64-bit to simplify assembly.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1999 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2000#else
2001 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2008 * to 32 bits.
2009 * @note Zero extending the value to 64-bit to simplify assembly.
2010 */
2011IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2012{
2013#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2014 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2015#else
2016 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2017#endif
2018}
2019
2020
2021/**
2022 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2023 * to 64 bits.
2024 * @note Zero extending the value to 64-bit to simplify assembly.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2029 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2030#else
2031 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to load unsigned 32-bit data w/ flat address.
2038 * @note Zero extending the value to 64-bit to simplify assembly.
2039 */
2040IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2041{
2042#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2043 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2044#else
2045 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2046#endif
2047}
2048
2049
2050/**
2051 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2052 * to 64 bits.
2053 * @note Zero extending the value to 64-bit to simplify assembly.
2054 */
2055IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2056{
2057#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2058 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2059#else
2060 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2061#endif
2062}
2063
2064
2065/**
2066 * Used by TB code to load unsigned 64-bit data w/ flat address.
2067 */
2068IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2069{
2070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2071 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2072#else
2073 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2074#endif
2075}
2076
2077
2078/**
2079 * Used by TB code to store unsigned 8-bit data w/ flat address.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2082{
2083#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2084 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2085#else
2086 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2087#endif
2088}
2089
2090
2091/**
2092 * Used by TB code to store unsigned 16-bit data w/ flat address.
2093 */
2094IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2095{
2096#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2097 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2098#else
2099 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2100#endif
2101}
2102
2103
2104/**
2105 * Used by TB code to store unsigned 32-bit data w/ flat address.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2110 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2111#else
2112 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to store unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2123 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2124#else
2125 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2126#endif
2127}
2128
2129
2130
2131/**
2132 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2137 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2138#else
2139 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2150 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2151#else
2152 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2153#endif
2154}
2155
2156
2157/**
2158 * Used by TB code to store a segment selector value onto a flat stack.
2159 *
2160 * Intel CPUs doesn't do write a whole dword, thus the special function.
2161 */
2162IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2163{
2164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2165 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2166#else
2167 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2168#endif
2169}
2170
2171
2172/**
2173 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2174 */
2175IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2176{
2177#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2178 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2179#else
2180 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2181#endif
2182}
2183
2184
2185/**
2186 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2187 */
2188IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2189{
2190#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2191 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2192#else
2193 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2194#endif
2195}
2196
2197
2198/**
2199 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2202{
2203#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2204 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2205#else
2206 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2207#endif
2208}
2209
2210
2211/**
2212 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2217 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2218#else
2219 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2220#endif
2221}
2222
2223
2224
2225/*********************************************************************************************************************************
2226* Helpers: Segmented memory mapping. *
2227*********************************************************************************************************************************/
2228
2229/**
2230 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2231 */
2232IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2233 RTGCPTR GCPtrMem, uint8_t iSegReg))
2234{
2235#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2236 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2237#else
2238 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2239#endif
2240}
2241
2242
2243/**
2244 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2245 */
2246IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2247 RTGCPTR GCPtrMem, uint8_t iSegReg))
2248{
2249#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2250 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2251#else
2252 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2253#endif
2254}
2255
2256
2257/**
2258 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2259 */
2260IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2261 RTGCPTR GCPtrMem, uint8_t iSegReg))
2262{
2263#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2264 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2265#else
2266 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2267#endif
2268}
2269
2270
2271/**
2272 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2273 */
2274IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2275 RTGCPTR GCPtrMem, uint8_t iSegReg))
2276{
2277#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2278 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2279#else
2280 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2281#endif
2282}
2283
2284
2285/**
2286 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2287 */
2288IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2289 RTGCPTR GCPtrMem, uint8_t iSegReg))
2290{
2291#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2292 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2293#else
2294 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2295#endif
2296}
2297
2298
2299/**
2300 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2301 */
2302IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2303 RTGCPTR GCPtrMem, uint8_t iSegReg))
2304{
2305#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2306 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2307#else
2308 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2309#endif
2310}
2311
2312
2313/**
2314 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2315 */
2316IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2317 RTGCPTR GCPtrMem, uint8_t iSegReg))
2318{
2319#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2320 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2321#else
2322 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2323#endif
2324}
2325
2326
2327/**
2328 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2329 */
2330IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2331 RTGCPTR GCPtrMem, uint8_t iSegReg))
2332{
2333#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2334 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2335#else
2336 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2337#endif
2338}
2339
2340
2341/**
2342 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2343 */
2344IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2345 RTGCPTR GCPtrMem, uint8_t iSegReg))
2346{
2347#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2348 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#else
2350 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2351#endif
2352}
2353
2354
2355/**
2356 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2357 */
2358IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2359 RTGCPTR GCPtrMem, uint8_t iSegReg))
2360{
2361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2362 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#else
2364 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2365#endif
2366}
2367
2368
2369/**
2370 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2371 */
2372IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2373 RTGCPTR GCPtrMem, uint8_t iSegReg))
2374{
2375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2376 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2377#else
2378 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2379#endif
2380}
2381
2382
2383/**
2384 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2385 */
2386IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2387 RTGCPTR GCPtrMem, uint8_t iSegReg))
2388{
2389#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2390 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2391#else
2392 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2393#endif
2394}
2395
2396
2397/**
2398 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2399 */
2400IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2401 RTGCPTR GCPtrMem, uint8_t iSegReg))
2402{
2403#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2404 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2405#else
2406 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2407#endif
2408}
2409
2410
2411/**
2412 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2413 */
2414IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2415 RTGCPTR GCPtrMem, uint8_t iSegReg))
2416{
2417#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2418 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2419#else
2420 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2421#endif
2422}
2423
2424
2425/**
2426 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2427 */
2428IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2429 RTGCPTR GCPtrMem, uint8_t iSegReg))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2432 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2433#else
2434 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2443 RTGCPTR GCPtrMem, uint8_t iSegReg))
2444{
2445#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2446 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2447#else
2448 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2449#endif
2450}
2451
2452
2453/**
2454 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/*********************************************************************************************************************************
2468* Helpers: Flat memory mapping. *
2469*********************************************************************************************************************************/
2470
2471/**
2472 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2473 */
2474IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2475{
2476#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2477 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2478#else
2479 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2480#endif
2481}
2482
2483
2484/**
2485 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2486 */
2487IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2488{
2489#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2490 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2491#else
2492 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2493#endif
2494}
2495
2496
2497/**
2498 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2499 */
2500IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2501{
2502#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2503 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2504#else
2505 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2506#endif
2507}
2508
2509
2510/**
2511 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2512 */
2513IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2514{
2515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2516 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2517#else
2518 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2519#endif
2520}
2521
2522
2523/**
2524 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2525 */
2526IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2527{
2528#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2529 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2530#else
2531 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2532#endif
2533}
2534
2535
2536/**
2537 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2538 */
2539IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2540{
2541#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2542 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2543#else
2544 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2545#endif
2546}
2547
2548
2549/**
2550 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2551 */
2552IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2553{
2554#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2555 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2556#else
2557 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2558#endif
2559}
2560
2561
2562/**
2563 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2564 */
2565IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2566{
2567#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2568 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2569#else
2570 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2571#endif
2572}
2573
2574
2575/**
2576 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2577 */
2578IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2579{
2580#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2581 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2582#else
2583 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2584#endif
2585}
2586
2587
2588/**
2589 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2590 */
2591IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2592{
2593#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2594 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2595#else
2596 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2597#endif
2598}
2599
2600
2601/**
2602 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2603 */
2604IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2605{
2606#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2607 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2608#else
2609 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2610#endif
2611}
2612
2613
2614/**
2615 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2616 */
2617IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2618{
2619#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2620 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2621#else
2622 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2623#endif
2624}
2625
2626
2627/**
2628 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2629 */
2630IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2631{
2632#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2633 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2634#else
2635 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2636#endif
2637}
2638
2639
2640/**
2641 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2642 */
2643IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2644{
2645#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2646 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2647#else
2648 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2649#endif
2650}
2651
2652
2653/**
2654 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2655 */
2656IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2657{
2658#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2659 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2660#else
2661 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2662#endif
2663}
2664
2665
2666/**
2667 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2668 */
2669IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2670{
2671#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2672 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2673#else
2674 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2675#endif
2676}
2677
2678
2679/**
2680 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2681 */
2682IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2686#else
2687 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2688#endif
2689}
2690
2691
2692/*********************************************************************************************************************************
2693* Helpers: Commit, rollback & unmap *
2694*********************************************************************************************************************************/
2695
2696/**
2697 * Used by TB code to commit and unmap a read-write memory mapping.
2698 */
2699IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2700{
2701 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2702}
2703
2704
2705/**
2706 * Used by TB code to commit and unmap a write-only memory mapping.
2707 */
2708IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2709{
2710 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2711}
2712
2713
2714/**
2715 * Used by TB code to commit and unmap a read-only memory mapping.
2716 */
2717IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2718{
2719 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2720}
2721
2722
2723/**
2724 * Reinitializes the native recompiler state.
2725 *
2726 * Called before starting a new recompile job.
2727 */
2728static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2729{
2730 pReNative->cLabels = 0;
2731 pReNative->bmLabelTypes = 0;
2732 pReNative->cFixups = 0;
2733#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2734 pReNative->pDbgInfo->cEntries = 0;
2735#endif
2736 pReNative->pTbOrg = pTb;
2737 pReNative->cCondDepth = 0;
2738 pReNative->uCondSeqNo = 0;
2739 pReNative->uCheckIrqSeqNo = 0;
2740 pReNative->uTlbSeqNo = 0;
2741
2742 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2743#if IEMNATIVE_HST_GREG_COUNT < 32
2744 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2745#endif
2746 ;
2747 pReNative->Core.bmHstRegsWithGstShadow = 0;
2748 pReNative->Core.bmGstRegShadows = 0;
2749 pReNative->Core.bmVars = 0;
2750 pReNative->Core.bmStack = 0;
2751 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2752 pReNative->Core.u64ArgVars = UINT64_MAX;
2753
2754 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2755 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2756 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2757 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2758 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2759 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2760 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2761 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2762 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2763 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2764
2765 /* Full host register reinit: */
2766 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2767 {
2768 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2769 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2770 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2771 }
2772
2773 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2774 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2775#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2776 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2777#endif
2778#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2779 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2780#endif
2781 );
2782 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2783 {
2784 fRegs &= ~RT_BIT_32(idxReg);
2785 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2786 }
2787
2788 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2789#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2790 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2791#endif
2792#ifdef IEMNATIVE_REG_FIXED_TMP0
2793 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2794#endif
2795 return pReNative;
2796}
2797
2798
2799/**
2800 * Allocates and initializes the native recompiler state.
2801 *
2802 * This is called the first time an EMT wants to recompile something.
2803 *
2804 * @returns Pointer to the new recompiler state.
2805 * @param pVCpu The cross context virtual CPU structure of the calling
2806 * thread.
2807 * @param pTb The TB that's about to be recompiled.
2808 * @thread EMT(pVCpu)
2809 */
2810static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2811{
2812 VMCPU_ASSERT_EMT(pVCpu);
2813
2814 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2815 AssertReturn(pReNative, NULL);
2816
2817 /*
2818 * Try allocate all the buffers and stuff we need.
2819 */
2820 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2821 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2822 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2823#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2824 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2825#endif
2826 if (RT_LIKELY( pReNative->pInstrBuf
2827 && pReNative->paLabels
2828 && pReNative->paFixups)
2829#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2830 && pReNative->pDbgInfo
2831#endif
2832 )
2833 {
2834 /*
2835 * Set the buffer & array sizes on success.
2836 */
2837 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2838 pReNative->cLabelsAlloc = _8K;
2839 pReNative->cFixupsAlloc = _16K;
2840#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2841 pReNative->cDbgInfoAlloc = _16K;
2842#endif
2843
2844 /* Other constant stuff: */
2845 pReNative->pVCpu = pVCpu;
2846
2847 /*
2848 * Done, just need to save it and reinit it.
2849 */
2850 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2851 return iemNativeReInit(pReNative, pTb);
2852 }
2853
2854 /*
2855 * Failed. Cleanup and return.
2856 */
2857 AssertFailed();
2858 RTMemFree(pReNative->pInstrBuf);
2859 RTMemFree(pReNative->paLabels);
2860 RTMemFree(pReNative->paFixups);
2861#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2862 RTMemFree(pReNative->pDbgInfo);
2863#endif
2864 RTMemFree(pReNative);
2865 return NULL;
2866}
2867
2868
2869/**
2870 * Creates a label
2871 *
2872 * If the label does not yet have a defined position,
2873 * call iemNativeLabelDefine() later to set it.
2874 *
2875 * @returns Label ID. Throws VBox status code on failure, so no need to check
2876 * the return value.
2877 * @param pReNative The native recompile state.
2878 * @param enmType The label type.
2879 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2880 * label is not yet defined (default).
2881 * @param uData Data associated with the lable. Only applicable to
2882 * certain type of labels. Default is zero.
2883 */
2884DECL_HIDDEN_THROW(uint32_t)
2885iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2886 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2887{
2888 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2889
2890 /*
2891 * Locate existing label definition.
2892 *
2893 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2894 * and uData is zero.
2895 */
2896 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2897 uint32_t const cLabels = pReNative->cLabels;
2898 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2899#ifndef VBOX_STRICT
2900 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2901 && offWhere == UINT32_MAX
2902 && uData == 0
2903#endif
2904 )
2905 {
2906#ifndef VBOX_STRICT
2907 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2908 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2909 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2910 if (idxLabel < pReNative->cLabels)
2911 return idxLabel;
2912#else
2913 for (uint32_t i = 0; i < cLabels; i++)
2914 if ( paLabels[i].enmType == enmType
2915 && paLabels[i].uData == uData)
2916 {
2917 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2918 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2919 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2920 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2921 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2922 return i;
2923 }
2924 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2925 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2926#endif
2927 }
2928
2929 /*
2930 * Make sure we've got room for another label.
2931 */
2932 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2933 { /* likely */ }
2934 else
2935 {
2936 uint32_t cNew = pReNative->cLabelsAlloc;
2937 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2938 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2939 cNew *= 2;
2940 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2941 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2942 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2943 pReNative->paLabels = paLabels;
2944 pReNative->cLabelsAlloc = cNew;
2945 }
2946
2947 /*
2948 * Define a new label.
2949 */
2950 paLabels[cLabels].off = offWhere;
2951 paLabels[cLabels].enmType = enmType;
2952 paLabels[cLabels].uData = uData;
2953 pReNative->cLabels = cLabels + 1;
2954
2955 Assert((unsigned)enmType < 64);
2956 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2957
2958 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2959 {
2960 Assert(uData == 0);
2961 pReNative->aidxUniqueLabels[enmType] = cLabels;
2962 }
2963
2964 if (offWhere != UINT32_MAX)
2965 {
2966#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2967 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2968 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2969#endif
2970 }
2971 return cLabels;
2972}
2973
2974
2975/**
2976 * Defines the location of an existing label.
2977 *
2978 * @param pReNative The native recompile state.
2979 * @param idxLabel The label to define.
2980 * @param offWhere The position.
2981 */
2982DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2983{
2984 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2985 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2986 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2987 pLabel->off = offWhere;
2988#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2989 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2990 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2991#endif
2992}
2993
2994
2995/**
2996 * Looks up a lable.
2997 *
2998 * @returns Label ID if found, UINT32_MAX if not.
2999 */
3000static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3001 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3002{
3003 Assert((unsigned)enmType < 64);
3004 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3005 {
3006 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3007 return pReNative->aidxUniqueLabels[enmType];
3008
3009 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3010 uint32_t const cLabels = pReNative->cLabels;
3011 for (uint32_t i = 0; i < cLabels; i++)
3012 if ( paLabels[i].enmType == enmType
3013 && paLabels[i].uData == uData
3014 && ( paLabels[i].off == offWhere
3015 || offWhere == UINT32_MAX
3016 || paLabels[i].off == UINT32_MAX))
3017 return i;
3018 }
3019 return UINT32_MAX;
3020}
3021
3022
3023/**
3024 * Adds a fixup.
3025 *
3026 * @throws VBox status code (int) on failure.
3027 * @param pReNative The native recompile state.
3028 * @param offWhere The instruction offset of the fixup location.
3029 * @param idxLabel The target label ID for the fixup.
3030 * @param enmType The fixup type.
3031 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3032 */
3033DECL_HIDDEN_THROW(void)
3034iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3035 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3036{
3037 Assert(idxLabel <= UINT16_MAX);
3038 Assert((unsigned)enmType <= UINT8_MAX);
3039
3040 /*
3041 * Make sure we've room.
3042 */
3043 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3044 uint32_t const cFixups = pReNative->cFixups;
3045 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3046 { /* likely */ }
3047 else
3048 {
3049 uint32_t cNew = pReNative->cFixupsAlloc;
3050 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3051 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3052 cNew *= 2;
3053 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3054 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3055 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3056 pReNative->paFixups = paFixups;
3057 pReNative->cFixupsAlloc = cNew;
3058 }
3059
3060 /*
3061 * Add the fixup.
3062 */
3063 paFixups[cFixups].off = offWhere;
3064 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3065 paFixups[cFixups].enmType = enmType;
3066 paFixups[cFixups].offAddend = offAddend;
3067 pReNative->cFixups = cFixups + 1;
3068}
3069
3070
3071/**
3072 * Slow code path for iemNativeInstrBufEnsure.
3073 */
3074DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3075{
3076 /* Double the buffer size till we meet the request. */
3077 uint32_t cNew = pReNative->cInstrBufAlloc;
3078 AssertReturn(cNew > 0, NULL);
3079 do
3080 cNew *= 2;
3081 while (cNew < off + cInstrReq);
3082
3083 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3084#ifdef RT_ARCH_ARM64
3085 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3086#else
3087 uint32_t const cbMaxInstrBuf = _2M;
3088#endif
3089 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3090
3091 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3092 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3093
3094#ifdef VBOX_STRICT
3095 pReNative->offInstrBufChecked = off + cInstrReq;
3096#endif
3097 pReNative->cInstrBufAlloc = cNew;
3098 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3099}
3100
3101#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3102
3103/**
3104 * Grows the static debug info array used during recompilation.
3105 *
3106 * @returns Pointer to the new debug info block; throws VBox status code on
3107 * failure, so no need to check the return value.
3108 */
3109DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3110{
3111 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3112 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3113 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3114 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3115 pReNative->pDbgInfo = pDbgInfo;
3116 pReNative->cDbgInfoAlloc = cNew;
3117 return pDbgInfo;
3118}
3119
3120
3121/**
3122 * Adds a new debug info uninitialized entry, returning the pointer to it.
3123 */
3124DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3125{
3126 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3127 { /* likely */ }
3128 else
3129 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3130 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3131}
3132
3133
3134/**
3135 * Debug Info: Adds a native offset record, if necessary.
3136 */
3137static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3138{
3139 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3140
3141 /*
3142 * Search backwards to see if we've got a similar record already.
3143 */
3144 uint32_t idx = pDbgInfo->cEntries;
3145 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3146 while (idx-- > idxStop)
3147 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3148 {
3149 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3150 return;
3151 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3152 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3153 break;
3154 }
3155
3156 /*
3157 * Add it.
3158 */
3159 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3160 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3161 pEntry->NativeOffset.offNative = off;
3162}
3163
3164
3165/**
3166 * Debug Info: Record info about a label.
3167 */
3168static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3169{
3170 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3171 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3172 pEntry->Label.uUnused = 0;
3173 pEntry->Label.enmLabel = (uint8_t)enmType;
3174 pEntry->Label.uData = uData;
3175}
3176
3177
3178/**
3179 * Debug Info: Record info about a threaded call.
3180 */
3181static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3182{
3183 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3184 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3185 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3186 pEntry->ThreadedCall.uUnused = 0;
3187 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3188}
3189
3190
3191/**
3192 * Debug Info: Record info about a new guest instruction.
3193 */
3194static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3195{
3196 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3197 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3198 pEntry->GuestInstruction.uUnused = 0;
3199 pEntry->GuestInstruction.fExec = fExec;
3200}
3201
3202
3203/**
3204 * Debug Info: Record info about guest register shadowing.
3205 */
3206static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3207 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3208{
3209 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3210 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3211 pEntry->GuestRegShadowing.uUnused = 0;
3212 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3213 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3214 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3215}
3216
3217#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3218
3219
3220/*********************************************************************************************************************************
3221* Register Allocator *
3222*********************************************************************************************************************************/
3223
3224/**
3225 * Register parameter indexes (indexed by argument number).
3226 */
3227DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3228{
3229 IEMNATIVE_CALL_ARG0_GREG,
3230 IEMNATIVE_CALL_ARG1_GREG,
3231 IEMNATIVE_CALL_ARG2_GREG,
3232 IEMNATIVE_CALL_ARG3_GREG,
3233#if defined(IEMNATIVE_CALL_ARG4_GREG)
3234 IEMNATIVE_CALL_ARG4_GREG,
3235# if defined(IEMNATIVE_CALL_ARG5_GREG)
3236 IEMNATIVE_CALL_ARG5_GREG,
3237# if defined(IEMNATIVE_CALL_ARG6_GREG)
3238 IEMNATIVE_CALL_ARG6_GREG,
3239# if defined(IEMNATIVE_CALL_ARG7_GREG)
3240 IEMNATIVE_CALL_ARG7_GREG,
3241# endif
3242# endif
3243# endif
3244#endif
3245};
3246
3247/**
3248 * Call register masks indexed by argument count.
3249 */
3250DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3251{
3252 0,
3253 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3254 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3255 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3256 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3257 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3258#if defined(IEMNATIVE_CALL_ARG4_GREG)
3259 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3260 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3261# if defined(IEMNATIVE_CALL_ARG5_GREG)
3262 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3263 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3264# if defined(IEMNATIVE_CALL_ARG6_GREG)
3265 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3266 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3267 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3268# if defined(IEMNATIVE_CALL_ARG7_GREG)
3269 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3270 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3271 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3272# endif
3273# endif
3274# endif
3275#endif
3276};
3277
3278#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3279/**
3280 * BP offset of the stack argument slots.
3281 *
3282 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3283 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3284 */
3285DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3286{
3287 IEMNATIVE_FP_OFF_STACK_ARG0,
3288# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3289 IEMNATIVE_FP_OFF_STACK_ARG1,
3290# endif
3291# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3292 IEMNATIVE_FP_OFF_STACK_ARG2,
3293# endif
3294# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3295 IEMNATIVE_FP_OFF_STACK_ARG3,
3296# endif
3297};
3298AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3299#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3300
3301/**
3302 * Info about shadowed guest register values.
3303 * @see IEMNATIVEGSTREG
3304 */
3305static struct
3306{
3307 /** Offset in VMCPU. */
3308 uint32_t off;
3309 /** The field size. */
3310 uint8_t cb;
3311 /** Name (for logging). */
3312 const char *pszName;
3313} const g_aGstShadowInfo[] =
3314{
3315#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3316 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3317 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3318 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3319 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3320 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3321 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3322 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3323 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3324 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3325 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3326 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3327 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3328 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3329 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3330 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3331 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3332 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3333 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3334 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3335 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3336 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3337 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3338 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3339 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3340 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3341 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3342 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3343 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3344 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3345 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3346 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3347 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3348 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3349 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3350 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3351 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3352 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3353 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3354 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3355 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3356 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3357 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3358#undef CPUMCTX_OFF_AND_SIZE
3359};
3360AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3361
3362
3363/** Host CPU general purpose register names. */
3364DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3365{
3366#ifdef RT_ARCH_AMD64
3367 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3368#elif RT_ARCH_ARM64
3369 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3370 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3371#else
3372# error "port me"
3373#endif
3374};
3375
3376
3377DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3378 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3379{
3380 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3381
3382 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3383 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3384 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3385 return (uint8_t)idxReg;
3386}
3387
3388
3389/**
3390 * Tries to locate a suitable register in the given register mask.
3391 *
3392 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3393 * failed.
3394 *
3395 * @returns Host register number on success, returns UINT8_MAX on failure.
3396 */
3397static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3398{
3399 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3400 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3401 if (fRegs)
3402 {
3403 /** @todo pick better here: */
3404 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3405
3406 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3407 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3408 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3409 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3410
3411 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3412 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3413 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3414 return idxReg;
3415 }
3416 return UINT8_MAX;
3417}
3418
3419
3420/**
3421 * Locate a register, possibly freeing one up.
3422 *
3423 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3424 * failed.
3425 *
3426 * @returns Host register number on success. Returns UINT8_MAX if no registers
3427 * found, the caller is supposed to deal with this and raise a
3428 * allocation type specific status code (if desired).
3429 *
3430 * @throws VBox status code if we're run into trouble spilling a variable of
3431 * recording debug info. Does NOT throw anything if we're out of
3432 * registers, though.
3433 */
3434static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3435 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3436{
3437 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3438 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3439
3440 /*
3441 * Try a freed register that's shadowing a guest register
3442 */
3443 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3444 if (fRegs)
3445 {
3446 unsigned const idxReg = (fPreferVolatile
3447 ? ASMBitFirstSetU32(fRegs)
3448 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3449 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
3450 - 1;
3451
3452 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3453 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3454 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3455 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3456
3457 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3458 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3459 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3460 return idxReg;
3461 }
3462
3463 /*
3464 * Try free up a variable that's in a register.
3465 *
3466 * We do two rounds here, first evacuating variables we don't need to be
3467 * saved on the stack, then in the second round move things to the stack.
3468 */
3469 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3470 {
3471 uint32_t fVars = pReNative->Core.bmVars;
3472 while (fVars)
3473 {
3474 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3475 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3476 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3477 && (RT_BIT_32(idxReg) & fRegMask)
3478 && ( iLoop == 0
3479 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3480 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3481 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3482 {
3483 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3484 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3485 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3486 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3487 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3488 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3489
3490 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3491 {
3492 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3493 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3494 }
3495
3496 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3497 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3498
3499 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3500 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3501 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3502 return idxReg;
3503 }
3504 fVars &= ~RT_BIT_32(idxVar);
3505 }
3506 }
3507
3508 return UINT8_MAX;
3509}
3510
3511
3512/**
3513 * Reassigns a variable to a different register specified by the caller.
3514 *
3515 * @returns The new code buffer position.
3516 * @param pReNative The native recompile state.
3517 * @param off The current code buffer position.
3518 * @param idxVar The variable index.
3519 * @param idxRegOld The old host register number.
3520 * @param idxRegNew The new host register number.
3521 * @param pszCaller The caller for logging.
3522 */
3523static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3524 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3525{
3526 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3527 RT_NOREF(pszCaller);
3528
3529 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3530
3531 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3532 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3533 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3534 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3535
3536 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3537 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3538 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3539 if (fGstRegShadows)
3540 {
3541 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3542 | RT_BIT_32(idxRegNew);
3543 while (fGstRegShadows)
3544 {
3545 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3546 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3547
3548 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3549 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3550 }
3551 }
3552
3553 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3554 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3555 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3556 return off;
3557}
3558
3559
3560/**
3561 * Moves a variable to a different register or spills it onto the stack.
3562 *
3563 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3564 * kinds can easily be recreated if needed later.
3565 *
3566 * @returns The new code buffer position.
3567 * @param pReNative The native recompile state.
3568 * @param off The current code buffer position.
3569 * @param idxVar The variable index.
3570 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3571 * call-volatile registers.
3572 */
3573static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3574 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3575{
3576 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3577 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3578 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3579
3580 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3581 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3582 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3583 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3584 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3585 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3586 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3587 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3588 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3589
3590
3591 /** @todo Add statistics on this.*/
3592 /** @todo Implement basic variable liveness analysis (python) so variables
3593 * can be freed immediately once no longer used. This has the potential to
3594 * be trashing registers and stack for dead variables. */
3595
3596 /*
3597 * First try move it to a different register, as that's cheaper.
3598 */
3599 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3600 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3601 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3602 if (fRegs)
3603 {
3604 /* Avoid using shadow registers, if possible. */
3605 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3606 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3607 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3608 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3609 }
3610
3611 /*
3612 * Otherwise we must spill the register onto the stack.
3613 */
3614 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3615 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3616 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3617 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3618
3619 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3620 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3621 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3622 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3623 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3624 return off;
3625}
3626
3627
3628/**
3629 * Allocates a temporary host general purpose register.
3630 *
3631 * This may emit code to save register content onto the stack in order to free
3632 * up a register.
3633 *
3634 * @returns The host register number; throws VBox status code on failure,
3635 * so no need to check the return value.
3636 * @param pReNative The native recompile state.
3637 * @param poff Pointer to the variable with the code buffer position.
3638 * This will be update if we need to move a variable from
3639 * register to stack in order to satisfy the request.
3640 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3641 * registers (@c true, default) or the other way around
3642 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3643 */
3644DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3645{
3646 /*
3647 * Try find a completely unused register, preferably a call-volatile one.
3648 */
3649 uint8_t idxReg;
3650 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3651 & ~pReNative->Core.bmHstRegsWithGstShadow
3652 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3653 if (fRegs)
3654 {
3655 if (fPreferVolatile)
3656 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3657 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3658 else
3659 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3660 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3661 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3662 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3663 }
3664 else
3665 {
3666 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3667 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3668 }
3669 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3670}
3671
3672
3673/**
3674 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3675 * registers.
3676 *
3677 * @returns The host register number; throws VBox status code on failure,
3678 * so no need to check the return value.
3679 * @param pReNative The native recompile state.
3680 * @param poff Pointer to the variable with the code buffer position.
3681 * This will be update if we need to move a variable from
3682 * register to stack in order to satisfy the request.
3683 * @param fRegMask Mask of acceptable registers.
3684 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3685 * registers (@c true, default) or the other way around
3686 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3687 */
3688DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3689 bool fPreferVolatile /*= true*/)
3690{
3691 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3692 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3693
3694 /*
3695 * Try find a completely unused register, preferably a call-volatile one.
3696 */
3697 uint8_t idxReg;
3698 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3699 & ~pReNative->Core.bmHstRegsWithGstShadow
3700 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3701 & fRegMask;
3702 if (fRegs)
3703 {
3704 if (fPreferVolatile)
3705 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3706 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3707 else
3708 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3709 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3710 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3711 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3712 }
3713 else
3714 {
3715 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3716 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3717 }
3718 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3719}
3720
3721
3722/**
3723 * Allocates a temporary register for loading an immediate value into.
3724 *
3725 * This will emit code to load the immediate, unless there happens to be an
3726 * unused register with the value already loaded.
3727 *
3728 * The caller will not modify the returned register, it must be considered
3729 * read-only. Free using iemNativeRegFreeTmpImm.
3730 *
3731 * @returns The host register number; throws VBox status code on failure, so no
3732 * need to check the return value.
3733 * @param pReNative The native recompile state.
3734 * @param poff Pointer to the variable with the code buffer position.
3735 * @param uImm The immediate value that the register must hold upon
3736 * return.
3737 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3738 * registers (@c true, default) or the other way around
3739 * (@c false).
3740 *
3741 * @note Reusing immediate values has not been implemented yet.
3742 */
3743DECL_HIDDEN_THROW(uint8_t)
3744iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3745{
3746 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3747 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3748 return idxReg;
3749}
3750
3751
3752/**
3753 * Marks host register @a idxHstReg as containing a shadow copy of guest
3754 * register @a enmGstReg.
3755 *
3756 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3757 * host register before calling.
3758 */
3759DECL_FORCE_INLINE(void)
3760iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3761{
3762 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3763 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3764 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3765
3766 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3767 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
3768 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3769 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3770#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3771 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3772 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3773#else
3774 RT_NOREF(off);
3775#endif
3776}
3777
3778
3779/**
3780 * Clear any guest register shadow claims from @a idxHstReg.
3781 *
3782 * The register does not need to be shadowing any guest registers.
3783 */
3784DECL_FORCE_INLINE(void)
3785iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3786{
3787 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3788 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3789 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3790 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3791 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3792
3793#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3794 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3795 if (fGstRegs)
3796 {
3797 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3798 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3799 while (fGstRegs)
3800 {
3801 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3802 fGstRegs &= ~RT_BIT_64(iGstReg);
3803 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3804 }
3805 }
3806#else
3807 RT_NOREF(off);
3808#endif
3809
3810 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3811 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3812 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3813}
3814
3815
3816/**
3817 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3818 * and global overview flags.
3819 */
3820DECL_FORCE_INLINE(void)
3821iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3822{
3823 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3824 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3825 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3826 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3827 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3828 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3829 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3830
3831#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3832 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3833 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3834#else
3835 RT_NOREF(off);
3836#endif
3837
3838 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3839 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3840 if (!fGstRegShadowsNew)
3841 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3842 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
3843}
3844
3845
3846/**
3847 * Clear any guest register shadow claim for @a enmGstReg.
3848 */
3849DECL_FORCE_INLINE(void)
3850iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3851{
3852 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3853 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3854 {
3855 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3856 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3857 }
3858}
3859
3860
3861/**
3862 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
3863 * as the new shadow of it.
3864 */
3865DECL_FORCE_INLINE(void)
3866iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
3867 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3868{
3869 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3870 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3871 {
3872 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3873 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
3874 return;
3875 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3876 }
3877 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
3878}
3879
3880
3881/**
3882 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
3883 * to @a idxRegTo.
3884 */
3885DECL_FORCE_INLINE(void)
3886iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
3887 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3888{
3889 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
3890 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
3891 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
3892 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
3893 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3894 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
3895 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
3896 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
3897 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
3898
3899 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3900 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
3901 if (!fGstRegShadowsFrom)
3902 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
3903 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
3904 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
3905 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
3906#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3907 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3908 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
3909#else
3910 RT_NOREF(off);
3911#endif
3912}
3913
3914
3915/**
3916 * Allocates a temporary host general purpose register for keeping a guest
3917 * register value.
3918 *
3919 * Since we may already have a register holding the guest register value,
3920 * code will be emitted to do the loading if that's not the case. Code may also
3921 * be emitted if we have to free up a register to satify the request.
3922 *
3923 * @returns The host register number; throws VBox status code on failure, so no
3924 * need to check the return value.
3925 * @param pReNative The native recompile state.
3926 * @param poff Pointer to the variable with the code buffer
3927 * position. This will be update if we need to move a
3928 * variable from register to stack in order to satisfy
3929 * the request.
3930 * @param enmGstReg The guest register that will is to be updated.
3931 * @param enmIntendedUse How the caller will be using the host register.
3932 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3933 * register is okay (default). The ASSUMPTION here is
3934 * that the caller has already flushed all volatile
3935 * registers, so this is only applied if we allocate a
3936 * new register.
3937 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3938 */
3939DECL_HIDDEN_THROW(uint8_t)
3940iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3941 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3942 bool fNoVolatileRegs /*= false*/)
3943{
3944 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3945#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3946 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3947#endif
3948 uint32_t const fRegMask = !fNoVolatileRegs
3949 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3950 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3951
3952 /*
3953 * First check if the guest register value is already in a host register.
3954 */
3955 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3956 {
3957 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3958 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3959 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3960 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3961
3962 /* It's not supposed to be allocated... */
3963 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3964 {
3965 /*
3966 * If the register will trash the guest shadow copy, try find a
3967 * completely unused register we can use instead. If that fails,
3968 * we need to disassociate the host reg from the guest reg.
3969 */
3970 /** @todo would be nice to know if preserving the register is in any way helpful. */
3971 /* If the purpose is calculations, try duplicate the register value as
3972 we'll be clobbering the shadow. */
3973 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3974 && ( ~pReNative->Core.bmHstRegs
3975 & ~pReNative->Core.bmHstRegsWithGstShadow
3976 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3977 {
3978 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3979
3980 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3981
3982 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3983 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3984 g_apszIemNativeHstRegNames[idxRegNew]));
3985 idxReg = idxRegNew;
3986 }
3987 /* If the current register matches the restrictions, go ahead and allocate
3988 it for the caller. */
3989 else if (fRegMask & RT_BIT_32(idxReg))
3990 {
3991 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3992 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3993 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3994 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3995 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3996 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3997 else
3998 {
3999 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4000 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4001 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4002 }
4003 }
4004 /* Otherwise, allocate a register that satisfies the caller and transfer
4005 the shadowing if compatible with the intended use. (This basically
4006 means the call wants a non-volatile register (RSP push/pop scenario).) */
4007 else
4008 {
4009 Assert(fNoVolatileRegs);
4010 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4011 !fNoVolatileRegs
4012 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4013 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4014 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4015 {
4016 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4017 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4018 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4019 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4020 }
4021 else
4022 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4023 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4024 g_apszIemNativeHstRegNames[idxRegNew]));
4025 idxReg = idxRegNew;
4026 }
4027 }
4028 else
4029 {
4030 /*
4031 * Oops. Shadowed guest register already allocated!
4032 *
4033 * Allocate a new register, copy the value and, if updating, the
4034 * guest shadow copy assignment to the new register.
4035 */
4036 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4037 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4038 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4039 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4040
4041 /** @todo share register for readonly access. */
4042 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4043 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4044
4045 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4046 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4047
4048 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4049 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4050 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4051 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4052 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4053 else
4054 {
4055 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4056 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4057 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4058 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4059 }
4060 idxReg = idxRegNew;
4061 }
4062 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4063
4064#ifdef VBOX_STRICT
4065 /* Strict builds: Check that the value is correct. */
4066 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4067#endif
4068
4069 return idxReg;
4070 }
4071
4072 /*
4073 * Allocate a new register, load it with the guest value and designate it as a copy of the
4074 */
4075 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4076
4077 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4078 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4079
4080 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4081 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4082 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4083 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4084
4085 return idxRegNew;
4086}
4087
4088
4089/**
4090 * Allocates a temporary host general purpose register that already holds the
4091 * given guest register value.
4092 *
4093 * The use case for this function is places where the shadowing state cannot be
4094 * modified due to branching and such. This will fail if the we don't have a
4095 * current shadow copy handy or if it's incompatible. The only code that will
4096 * be emitted here is value checking code in strict builds.
4097 *
4098 * The intended use can only be readonly!
4099 *
4100 * @returns The host register number, UINT8_MAX if not present.
4101 * @param pReNative The native recompile state.
4102 * @param poff Pointer to the instruction buffer offset.
4103 * Will be updated in strict builds if a register is
4104 * found.
4105 * @param enmGstReg The guest register that will is to be updated.
4106 * @note In strict builds, this may throw instruction buffer growth failures.
4107 * Non-strict builds will not throw anything.
4108 * @sa iemNativeRegAllocTmpForGuestReg
4109 */
4110DECL_HIDDEN_THROW(uint8_t)
4111iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4112{
4113 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4114
4115 /*
4116 * First check if the guest register value is already in a host register.
4117 */
4118 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4119 {
4120 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4121 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4122 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4123 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4124
4125 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4126 {
4127 /*
4128 * We only do readonly use here, so easy compared to the other
4129 * variant of this code.
4130 */
4131 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4132 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4133 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4134 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4135 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4136
4137#ifdef VBOX_STRICT
4138 /* Strict builds: Check that the value is correct. */
4139 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4140#else
4141 RT_NOREF(poff);
4142#endif
4143 return idxReg;
4144 }
4145 }
4146
4147 return UINT8_MAX;
4148}
4149
4150
4151DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
4152
4153
4154/**
4155 * Allocates argument registers for a function call.
4156 *
4157 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4158 * need to check the return value.
4159 * @param pReNative The native recompile state.
4160 * @param off The current code buffer offset.
4161 * @param cArgs The number of arguments the function call takes.
4162 */
4163DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4164{
4165 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4166 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4167 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4168 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4169
4170 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4171 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4172 else if (cArgs == 0)
4173 return true;
4174
4175 /*
4176 * Do we get luck and all register are free and not shadowing anything?
4177 */
4178 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4179 for (uint32_t i = 0; i < cArgs; i++)
4180 {
4181 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4182 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4183 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4184 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4185 }
4186 /*
4187 * Okay, not lucky so we have to free up the registers.
4188 */
4189 else
4190 for (uint32_t i = 0; i < cArgs; i++)
4191 {
4192 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4193 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4194 {
4195 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4196 {
4197 case kIemNativeWhat_Var:
4198 {
4199 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4200 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
4201 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4202 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4203 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4204
4205 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4206 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4207 else
4208 {
4209 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4210 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4211 }
4212 break;
4213 }
4214
4215 case kIemNativeWhat_Tmp:
4216 case kIemNativeWhat_Arg:
4217 case kIemNativeWhat_rc:
4218 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4219 default:
4220 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4221 }
4222
4223 }
4224 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4225 {
4226 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4227 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4228 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4229 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4230 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4231 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4232 }
4233 else
4234 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4235 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4236 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4237 }
4238 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4239 return true;
4240}
4241
4242
4243DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4244
4245
4246#if 0
4247/**
4248 * Frees a register assignment of any type.
4249 *
4250 * @param pReNative The native recompile state.
4251 * @param idxHstReg The register to free.
4252 *
4253 * @note Does not update variables.
4254 */
4255DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4256{
4257 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4258 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4259 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4260 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4261 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4262 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4263 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4264 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4265 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4266 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4267 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4268 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4269 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4270 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4271
4272 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4273 /* no flushing, right:
4274 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4275 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4276 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4277 */
4278}
4279#endif
4280
4281
4282/**
4283 * Frees a temporary register.
4284 *
4285 * Any shadow copies of guest registers assigned to the host register will not
4286 * be flushed by this operation.
4287 */
4288DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4289{
4290 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4291 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4292 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4293 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4294 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4295}
4296
4297
4298/**
4299 * Frees a temporary immediate register.
4300 *
4301 * It is assumed that the call has not modified the register, so it still hold
4302 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4303 */
4304DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4305{
4306 iemNativeRegFreeTmp(pReNative, idxHstReg);
4307}
4308
4309
4310/**
4311 * Frees a register assigned to a variable.
4312 *
4313 * The register will be disassociated from the variable.
4314 */
4315DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4316{
4317 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4318 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4319 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4320 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4321 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4322
4323 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4324 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4325 if (!fFlushShadows)
4326 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4327 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4328 else
4329 {
4330 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4331 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4332 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4333 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4334 uint64_t fGstRegShadows = fGstRegShadowsOld;
4335 while (fGstRegShadows)
4336 {
4337 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4338 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4339
4340 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4341 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4342 }
4343 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4344 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4345 }
4346}
4347
4348
4349/**
4350 * Called right before emitting a call instruction to move anything important
4351 * out of call-volatile registers, free and flush the call-volatile registers,
4352 * optionally freeing argument variables.
4353 *
4354 * @returns New code buffer offset, UINT32_MAX on failure.
4355 * @param pReNative The native recompile state.
4356 * @param off The code buffer offset.
4357 * @param cArgs The number of arguments the function call takes.
4358 * It is presumed that the host register part of these have
4359 * been allocated as such already and won't need moving,
4360 * just freeing.
4361 * @param fKeepVars Mask of variables that should keep their register
4362 * assignments. Caller must take care to handle these.
4363 */
4364DECL_HIDDEN_THROW(uint32_t)
4365iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4366{
4367 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4368
4369 /* fKeepVars will reduce this mask. */
4370 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4371
4372 /*
4373 * Move anything important out of volatile registers.
4374 */
4375 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4376 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4377 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4378#ifdef IEMNATIVE_REG_FIXED_TMP0
4379 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4380#endif
4381 & ~g_afIemNativeCallRegs[cArgs];
4382
4383 fRegsToMove &= pReNative->Core.bmHstRegs;
4384 if (!fRegsToMove)
4385 { /* likely */ }
4386 else
4387 {
4388 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4389 while (fRegsToMove != 0)
4390 {
4391 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4392 fRegsToMove &= ~RT_BIT_32(idxReg);
4393
4394 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4395 {
4396 case kIemNativeWhat_Var:
4397 {
4398 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4399 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4400 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4401 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4402 if (!(RT_BIT_32(idxVar) & fKeepVars))
4403 {
4404 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4405 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4406 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4407 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4408 else
4409 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4410 }
4411 else
4412 fRegsToFree &= ~RT_BIT_32(idxReg);
4413 continue;
4414 }
4415
4416 case kIemNativeWhat_Arg:
4417 AssertMsgFailed(("What?!?: %u\n", idxReg));
4418 continue;
4419
4420 case kIemNativeWhat_rc:
4421 case kIemNativeWhat_Tmp:
4422 AssertMsgFailed(("Missing free: %u\n", idxReg));
4423 continue;
4424
4425 case kIemNativeWhat_FixedTmp:
4426 case kIemNativeWhat_pVCpuFixed:
4427 case kIemNativeWhat_pCtxFixed:
4428 case kIemNativeWhat_FixedReserved:
4429 case kIemNativeWhat_Invalid:
4430 case kIemNativeWhat_End:
4431 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4432 }
4433 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4434 }
4435 }
4436
4437 /*
4438 * Do the actual freeing.
4439 */
4440 if (pReNative->Core.bmHstRegs & fRegsToFree)
4441 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4442 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4443 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4444
4445 /* If there are guest register shadows in any call-volatile register, we
4446 have to clear the corrsponding guest register masks for each register. */
4447 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4448 if (fHstRegsWithGstShadow)
4449 {
4450 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4451 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4452 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4453 do
4454 {
4455 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4456 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4457
4458 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4459 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4460 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4461 } while (fHstRegsWithGstShadow != 0);
4462 }
4463
4464 return off;
4465}
4466
4467
4468/**
4469 * Flushes a set of guest register shadow copies.
4470 *
4471 * This is usually done after calling a threaded function or a C-implementation
4472 * of an instruction.
4473 *
4474 * @param pReNative The native recompile state.
4475 * @param fGstRegs Set of guest registers to flush.
4476 */
4477DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4478{
4479 /*
4480 * Reduce the mask by what's currently shadowed
4481 */
4482 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4483 fGstRegs &= bmGstRegShadowsOld;
4484 if (fGstRegs)
4485 {
4486 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4487 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4488 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4489 if (bmGstRegShadowsNew)
4490 {
4491 /*
4492 * Partial.
4493 */
4494 do
4495 {
4496 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4497 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4498 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4499 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4500 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4501
4502 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4503 fGstRegs &= ~fInThisHstReg;
4504 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4505 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4506 if (!fGstRegShadowsNew)
4507 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4508 } while (fGstRegs != 0);
4509 }
4510 else
4511 {
4512 /*
4513 * Clear all.
4514 */
4515 do
4516 {
4517 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4518 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4519 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4520 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4521 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4522
4523 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4524 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4525 } while (fGstRegs != 0);
4526 pReNative->Core.bmHstRegsWithGstShadow = 0;
4527 }
4528 }
4529}
4530
4531
4532/**
4533 * Flushes guest register shadow copies held by a set of host registers.
4534 *
4535 * This is used with the TLB lookup code for ensuring that we don't carry on
4536 * with any guest shadows in volatile registers, as these will get corrupted by
4537 * a TLB miss.
4538 *
4539 * @param pReNative The native recompile state.
4540 * @param fHstRegs Set of host registers to flush guest shadows for.
4541 */
4542DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4543{
4544 /*
4545 * Reduce the mask by what's currently shadowed.
4546 */
4547 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4548 fHstRegs &= bmHstRegsWithGstShadowOld;
4549 if (fHstRegs)
4550 {
4551 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4552 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4553 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4554 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4555 if (bmHstRegsWithGstShadowNew)
4556 {
4557 /*
4558 * Partial (likely).
4559 */
4560 uint64_t fGstShadows = 0;
4561 do
4562 {
4563 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4564 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4565 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4566 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4567
4568 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4569 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4570 fHstRegs &= ~RT_BIT_32(idxHstReg);
4571 } while (fHstRegs != 0);
4572 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4573 }
4574 else
4575 {
4576 /*
4577 * Clear all.
4578 */
4579 do
4580 {
4581 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4582 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4583 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4584 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4585
4586 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4587 fHstRegs &= ~RT_BIT_32(idxHstReg);
4588 } while (fHstRegs != 0);
4589 pReNative->Core.bmGstRegShadows = 0;
4590 }
4591 }
4592}
4593
4594
4595/**
4596 * Restores guest shadow copies in volatile registers.
4597 *
4598 * This is used after calling a helper function (think TLB miss) to restore the
4599 * register state of volatile registers.
4600 *
4601 * @param pReNative The native recompile state.
4602 * @param off The code buffer offset.
4603 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4604 * be active (allocated) w/o asserting. Hack.
4605 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4606 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4607 */
4608DECL_HIDDEN_THROW(uint32_t)
4609iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4610{
4611 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4612 if (fHstRegs)
4613 {
4614 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4615 do
4616 {
4617 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4618
4619 /* It's not fatal if a register is active holding a variable that
4620 shadowing a guest register, ASSUMING all pending guest register
4621 writes were flushed prior to the helper call. However, we'll be
4622 emitting duplicate restores, so it wasts code space. */
4623 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4624 RT_NOREF(fHstRegsActiveShadows);
4625
4626 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4627 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4628 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4629 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4630
4631 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4632 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4633
4634 fHstRegs &= ~RT_BIT_32(idxHstReg);
4635 } while (fHstRegs != 0);
4636 }
4637 return off;
4638}
4639
4640
4641/**
4642 * Flushes delayed write of a specific guest register.
4643 *
4644 * This must be called prior to calling CImpl functions and any helpers that use
4645 * the guest state (like raising exceptions) and such.
4646 *
4647 * This optimization has not yet been implemented. The first target would be
4648 * RIP updates, since these are the most common ones.
4649 */
4650DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4651 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4652{
4653 RT_NOREF(pReNative, enmClass, idxReg);
4654 return off;
4655}
4656
4657
4658/**
4659 * Flushes any delayed guest register writes.
4660 *
4661 * This must be called prior to calling CImpl functions and any helpers that use
4662 * the guest state (like raising exceptions) and such.
4663 *
4664 * This optimization has not yet been implemented. The first target would be
4665 * RIP updates, since these are the most common ones.
4666 */
4667DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4668{
4669 RT_NOREF(pReNative, off);
4670 return off;
4671}
4672
4673
4674#ifdef VBOX_STRICT
4675/**
4676 * Does internal register allocator sanity checks.
4677 */
4678static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
4679{
4680 /*
4681 * Iterate host registers building a guest shadowing set.
4682 */
4683 uint64_t bmGstRegShadows = 0;
4684 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
4685 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
4686 while (bmHstRegsWithGstShadow)
4687 {
4688 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
4689 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4690 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4691
4692 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4693 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
4694 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
4695 bmGstRegShadows |= fThisGstRegShadows;
4696 while (fThisGstRegShadows)
4697 {
4698 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
4699 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
4700 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
4701 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
4702 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
4703 }
4704 }
4705 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
4706 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
4707 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
4708
4709 /*
4710 * Now the other way around, checking the guest to host index array.
4711 */
4712 bmHstRegsWithGstShadow = 0;
4713 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
4714 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4715 while (bmGstRegShadows)
4716 {
4717 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
4718 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4719 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
4720
4721 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4722 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
4723 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
4724 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
4725 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4726 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4727 }
4728 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4729 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4730 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4731}
4732#endif
4733
4734
4735/*********************************************************************************************************************************
4736* Code Emitters (larger snippets) *
4737*********************************************************************************************************************************/
4738
4739/**
4740 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4741 * extending to 64-bit width.
4742 *
4743 * @returns New code buffer offset on success, UINT32_MAX on failure.
4744 * @param pReNative .
4745 * @param off The current code buffer position.
4746 * @param idxHstReg The host register to load the guest register value into.
4747 * @param enmGstReg The guest register to load.
4748 *
4749 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4750 * that is something the caller needs to do if applicable.
4751 */
4752DECL_HIDDEN_THROW(uint32_t)
4753iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4754{
4755 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4756 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4757
4758 switch (g_aGstShadowInfo[enmGstReg].cb)
4759 {
4760 case sizeof(uint64_t):
4761 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4762 case sizeof(uint32_t):
4763 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4764 case sizeof(uint16_t):
4765 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4766#if 0 /* not present in the table. */
4767 case sizeof(uint8_t):
4768 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4769#endif
4770 default:
4771 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4772 }
4773}
4774
4775
4776#ifdef VBOX_STRICT
4777/**
4778 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4779 *
4780 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4781 * Trashes EFLAGS on AMD64.
4782 */
4783static uint32_t
4784iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4785{
4786# ifdef RT_ARCH_AMD64
4787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4788
4789 /* rol reg64, 32 */
4790 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4791 pbCodeBuf[off++] = 0xc1;
4792 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4793 pbCodeBuf[off++] = 32;
4794
4795 /* test reg32, ffffffffh */
4796 if (idxReg >= 8)
4797 pbCodeBuf[off++] = X86_OP_REX_B;
4798 pbCodeBuf[off++] = 0xf7;
4799 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4800 pbCodeBuf[off++] = 0xff;
4801 pbCodeBuf[off++] = 0xff;
4802 pbCodeBuf[off++] = 0xff;
4803 pbCodeBuf[off++] = 0xff;
4804
4805 /* je/jz +1 */
4806 pbCodeBuf[off++] = 0x74;
4807 pbCodeBuf[off++] = 0x01;
4808
4809 /* int3 */
4810 pbCodeBuf[off++] = 0xcc;
4811
4812 /* rol reg64, 32 */
4813 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4814 pbCodeBuf[off++] = 0xc1;
4815 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4816 pbCodeBuf[off++] = 32;
4817
4818# elif defined(RT_ARCH_ARM64)
4819 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4820 /* lsr tmp0, reg64, #32 */
4821 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4822 /* cbz tmp0, +1 */
4823 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4824 /* brk #0x1100 */
4825 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4826
4827# else
4828# error "Port me!"
4829# endif
4830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4831 return off;
4832}
4833#endif /* VBOX_STRICT */
4834
4835
4836#ifdef VBOX_STRICT
4837/**
4838 * Emitting code that checks that the content of register @a idxReg is the same
4839 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4840 * instruction if that's not the case.
4841 *
4842 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4843 * Trashes EFLAGS on AMD64.
4844 */
4845static uint32_t
4846iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
4847{
4848# ifdef RT_ARCH_AMD64
4849 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4850
4851 /* cmp reg, [mem] */
4852 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
4853 {
4854 if (idxReg >= 8)
4855 pbCodeBuf[off++] = X86_OP_REX_R;
4856 pbCodeBuf[off++] = 0x38;
4857 }
4858 else
4859 {
4860 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
4861 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
4862 else
4863 {
4864 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
4865 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4866 else
4867 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
4868 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
4869 if (idxReg >= 8)
4870 pbCodeBuf[off++] = X86_OP_REX_R;
4871 }
4872 pbCodeBuf[off++] = 0x39;
4873 }
4874 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
4875
4876 /* je/jz +1 */
4877 pbCodeBuf[off++] = 0x74;
4878 pbCodeBuf[off++] = 0x01;
4879
4880 /* int3 */
4881 pbCodeBuf[off++] = 0xcc;
4882
4883 /* For values smaller than the register size, we must check that the rest
4884 of the register is all zeros. */
4885 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
4886 {
4887 /* test reg64, imm32 */
4888 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4889 pbCodeBuf[off++] = 0xf7;
4890 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4891 pbCodeBuf[off++] = 0;
4892 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
4893 pbCodeBuf[off++] = 0xff;
4894 pbCodeBuf[off++] = 0xff;
4895
4896 /* je/jz +1 */
4897 pbCodeBuf[off++] = 0x74;
4898 pbCodeBuf[off++] = 0x01;
4899
4900 /* int3 */
4901 pbCodeBuf[off++] = 0xcc;
4902 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4903 }
4904 else
4905 {
4906 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4907 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
4908 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
4909 }
4910
4911# elif defined(RT_ARCH_ARM64)
4912 /* mov TMP0, [gstreg] */
4913 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
4914
4915 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4916 /* sub tmp0, tmp0, idxReg */
4917 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
4918 /* cbz tmp0, +1 */
4919 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4920 /* brk #0x1000+enmGstReg */
4921 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
4922 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4923
4924# else
4925# error "Port me!"
4926# endif
4927 return off;
4928}
4929#endif /* VBOX_STRICT */
4930
4931
4932#ifdef VBOX_STRICT
4933/**
4934 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
4935 * important bits.
4936 *
4937 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4938 * Trashes EFLAGS on AMD64.
4939 */
4940static uint32_t
4941iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
4942{
4943 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4944 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
4945 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
4946 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
4947
4948#ifdef RT_ARCH_AMD64
4949 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4950
4951 /* je/jz +1 */
4952 pbCodeBuf[off++] = 0x74;
4953 pbCodeBuf[off++] = 0x01;
4954
4955 /* int3 */
4956 pbCodeBuf[off++] = 0xcc;
4957
4958# elif defined(RT_ARCH_ARM64)
4959 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4960
4961 /* b.eq +1 */
4962 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
4963 /* brk #0x2000 */
4964 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
4965
4966# else
4967# error "Port me!"
4968# endif
4969 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4970
4971 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4972 return off;
4973}
4974#endif /* VBOX_STRICT */
4975
4976
4977/**
4978 * Emits a code for checking the return code of a call and rcPassUp, returning
4979 * from the code if either are non-zero.
4980 */
4981DECL_HIDDEN_THROW(uint32_t)
4982iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
4983{
4984#ifdef RT_ARCH_AMD64
4985 /*
4986 * AMD64: eax = call status code.
4987 */
4988
4989 /* edx = rcPassUp */
4990 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4991# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4992 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
4993# endif
4994
4995 /* edx = eax | rcPassUp */
4996 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4997 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
4998 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
4999 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5000
5001 /* Jump to non-zero status return path. */
5002 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5003
5004 /* done. */
5005
5006#elif RT_ARCH_ARM64
5007 /*
5008 * ARM64: w0 = call status code.
5009 */
5010# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5011 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5012# endif
5013 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5014
5015 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5016
5017 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5018
5019 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5020 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5021 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5022
5023#else
5024# error "port me"
5025#endif
5026 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5027 return off;
5028}
5029
5030
5031/**
5032 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5033 * raising a \#GP(0) if it isn't.
5034 *
5035 * @returns New code buffer offset, UINT32_MAX on failure.
5036 * @param pReNative The native recompile state.
5037 * @param off The code buffer offset.
5038 * @param idxAddrReg The host register with the address to check.
5039 * @param idxInstr The current instruction.
5040 */
5041DECL_HIDDEN_THROW(uint32_t)
5042iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5043{
5044 /*
5045 * Make sure we don't have any outstanding guest register writes as we may
5046 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5047 */
5048 off = iemNativeRegFlushPendingWrites(pReNative, off);
5049
5050#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5051 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5052#else
5053 RT_NOREF(idxInstr);
5054#endif
5055
5056#ifdef RT_ARCH_AMD64
5057 /*
5058 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5059 * return raisexcpt();
5060 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5061 */
5062 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5063
5064 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5065 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5066 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5067 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5068 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5069
5070 iemNativeRegFreeTmp(pReNative, iTmpReg);
5071
5072#elif defined(RT_ARCH_ARM64)
5073 /*
5074 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5075 * return raisexcpt();
5076 * ----
5077 * mov x1, 0x800000000000
5078 * add x1, x0, x1
5079 * cmp xzr, x1, lsr 48
5080 * b.ne .Lraisexcpt
5081 */
5082 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5083
5084 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5085 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5086 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5087 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5088
5089 iemNativeRegFreeTmp(pReNative, iTmpReg);
5090
5091#else
5092# error "Port me"
5093#endif
5094 return off;
5095}
5096
5097
5098/**
5099 * Emits code to check if the content of @a idxAddrReg is within the limit of
5100 * idxSegReg, raising a \#GP(0) if it isn't.
5101 *
5102 * @returns New code buffer offset; throws VBox status code on error.
5103 * @param pReNative The native recompile state.
5104 * @param off The code buffer offset.
5105 * @param idxAddrReg The host register (32-bit) with the address to
5106 * check.
5107 * @param idxSegReg The segment register (X86_SREG_XXX) to check
5108 * against.
5109 * @param idxInstr The current instruction.
5110 */
5111DECL_HIDDEN_THROW(uint32_t)
5112iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5113 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
5114{
5115 /*
5116 * Make sure we don't have any outstanding guest register writes as we may
5117 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5118 */
5119 off = iemNativeRegFlushPendingWrites(pReNative, off);
5120
5121#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5122 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5123#else
5124 RT_NOREF(idxInstr);
5125#endif
5126
5127 /** @todo implement expand down/whatnot checking */
5128 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
5129
5130 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5131 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
5132 kIemNativeGstRegUse_ForUpdate);
5133
5134 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
5135 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5136
5137 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
5138 return off;
5139}
5140
5141
5142/**
5143 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5144 *
5145 * @returns The flush mask.
5146 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5147 * @param fGstShwFlush The starting flush mask.
5148 */
5149DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5150{
5151 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5152 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5153 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5154 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5155 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5156 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5157 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5158 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5159 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5160 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5161 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5162 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5163 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5164 return fGstShwFlush;
5165}
5166
5167
5168/**
5169 * Emits a call to a CImpl function or something similar.
5170 */
5171DECL_HIDDEN_THROW(uint32_t)
5172iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5173 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5174{
5175 /*
5176 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5177 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5178 */
5179 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5180 fGstShwFlush
5181 | RT_BIT_64(kIemNativeGstReg_Pc)
5182 | RT_BIT_64(kIemNativeGstReg_EFlags));
5183 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5184
5185 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5186
5187 /*
5188 * Load the parameters.
5189 */
5190#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5191 /* Special code the hidden VBOXSTRICTRC pointer. */
5192 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5193 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5194 if (cAddParams > 0)
5195 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5196 if (cAddParams > 1)
5197 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5198 if (cAddParams > 2)
5199 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5200 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5201
5202#else
5203 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5204 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5205 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5206 if (cAddParams > 0)
5207 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5208 if (cAddParams > 1)
5209 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5210 if (cAddParams > 2)
5211# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5212 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5213# else
5214 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5215# endif
5216#endif
5217
5218 /*
5219 * Make the call.
5220 */
5221 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5222
5223#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5224 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5225#endif
5226
5227 /*
5228 * Check the status code.
5229 */
5230 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5231}
5232
5233
5234/**
5235 * Emits a call to a threaded worker function.
5236 */
5237DECL_HIDDEN_THROW(uint32_t)
5238iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5239{
5240 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5241 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5242
5243#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5244 /* The threaded function may throw / long jmp, so set current instruction
5245 number if we're counting. */
5246 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5247#endif
5248
5249 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5250
5251#ifdef RT_ARCH_AMD64
5252 /* Load the parameters and emit the call. */
5253# ifdef RT_OS_WINDOWS
5254# ifndef VBOXSTRICTRC_STRICT_ENABLED
5255 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5256 if (cParams > 0)
5257 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5258 if (cParams > 1)
5259 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5260 if (cParams > 2)
5261 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5262# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5263 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5264 if (cParams > 0)
5265 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5266 if (cParams > 1)
5267 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5268 if (cParams > 2)
5269 {
5270 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5271 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5272 }
5273 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5274# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5275# else
5276 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5277 if (cParams > 0)
5278 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5279 if (cParams > 1)
5280 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5281 if (cParams > 2)
5282 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5283# endif
5284
5285 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5286
5287# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5288 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5289# endif
5290
5291#elif RT_ARCH_ARM64
5292 /*
5293 * ARM64:
5294 */
5295 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5296 if (cParams > 0)
5297 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5298 if (cParams > 1)
5299 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5300 if (cParams > 2)
5301 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5302
5303 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5304
5305#else
5306# error "port me"
5307#endif
5308
5309 /*
5310 * Check the status code.
5311 */
5312 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5313
5314 return off;
5315}
5316
5317
5318/**
5319 * Emits the code at the CheckBranchMiss label.
5320 */
5321static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5322{
5323 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5324 if (idxLabel != UINT32_MAX)
5325 {
5326 iemNativeLabelDefine(pReNative, idxLabel, off);
5327
5328 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5329 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5330 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5331
5332 /* jump back to the return sequence. */
5333 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5334 }
5335 return off;
5336}
5337
5338
5339/**
5340 * Emits the code at the NeedCsLimChecking label.
5341 */
5342static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5343{
5344 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5345 if (idxLabel != UINT32_MAX)
5346 {
5347 iemNativeLabelDefine(pReNative, idxLabel, off);
5348
5349 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5350 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5351 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5352
5353 /* jump back to the return sequence. */
5354 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5355 }
5356 return off;
5357}
5358
5359
5360/**
5361 * Emits the code at the ObsoleteTb label.
5362 */
5363static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5364{
5365 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5366 if (idxLabel != UINT32_MAX)
5367 {
5368 iemNativeLabelDefine(pReNative, idxLabel, off);
5369
5370 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5371 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5372 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5373
5374 /* jump back to the return sequence. */
5375 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5376 }
5377 return off;
5378}
5379
5380
5381/**
5382 * Emits the code at the RaiseGP0 label.
5383 */
5384static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5385{
5386 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5387 if (idxLabel != UINT32_MAX)
5388 {
5389 iemNativeLabelDefine(pReNative, idxLabel, off);
5390
5391 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5392 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5393 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5394
5395 /* jump back to the return sequence. */
5396 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5397 }
5398 return off;
5399}
5400
5401
5402/**
5403 * Emits the code at the ReturnWithFlags label (returns
5404 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5405 */
5406static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5407{
5408 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5409 if (idxLabel != UINT32_MAX)
5410 {
5411 iemNativeLabelDefine(pReNative, idxLabel, off);
5412
5413 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5414
5415 /* jump back to the return sequence. */
5416 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5417 }
5418 return off;
5419}
5420
5421
5422/**
5423 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5424 */
5425static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5426{
5427 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5428 if (idxLabel != UINT32_MAX)
5429 {
5430 iemNativeLabelDefine(pReNative, idxLabel, off);
5431
5432 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5433
5434 /* jump back to the return sequence. */
5435 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5436 }
5437 return off;
5438}
5439
5440
5441/**
5442 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5443 */
5444static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5445{
5446 /*
5447 * Generate the rc + rcPassUp fiddling code if needed.
5448 */
5449 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5450 if (idxLabel != UINT32_MAX)
5451 {
5452 iemNativeLabelDefine(pReNative, idxLabel, off);
5453
5454 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5455#ifdef RT_ARCH_AMD64
5456# ifdef RT_OS_WINDOWS
5457# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5458 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5459# endif
5460 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5461 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5462# else
5463 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5464 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5465# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5466 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5467# endif
5468# endif
5469# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5470 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5471# endif
5472
5473#else
5474 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5475 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5476 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5477#endif
5478
5479 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5480 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5481 }
5482 return off;
5483}
5484
5485
5486/**
5487 * Emits a standard epilog.
5488 */
5489static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5490{
5491 *pidxReturnLabel = UINT32_MAX;
5492
5493 /*
5494 * Successful return, so clear the return register (eax, w0).
5495 */
5496 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5497
5498 /*
5499 * Define label for common return point.
5500 */
5501 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5502 *pidxReturnLabel = idxReturn;
5503
5504 /*
5505 * Restore registers and return.
5506 */
5507#ifdef RT_ARCH_AMD64
5508 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5509
5510 /* Reposition esp at the r15 restore point. */
5511 pbCodeBuf[off++] = X86_OP_REX_W;
5512 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5513 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5514 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5515
5516 /* Pop non-volatile registers and return */
5517 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5518 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5519 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5520 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5521 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5522 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5523 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5524 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5525# ifdef RT_OS_WINDOWS
5526 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5527 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5528# endif
5529 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5530 pbCodeBuf[off++] = 0xc9; /* leave */
5531 pbCodeBuf[off++] = 0xc3; /* ret */
5532 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5533
5534#elif RT_ARCH_ARM64
5535 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5536
5537 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5538 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5539 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5540 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5541 IEMNATIVE_FRAME_VAR_SIZE / 8);
5542 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5543 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5544 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5545 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5546 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5547 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5548 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5549 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5550 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5551 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5552 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5553 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5554
5555 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5556 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5557 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5558 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5559
5560 /* retab / ret */
5561# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5562 if (1)
5563 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5564 else
5565# endif
5566 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5567
5568#else
5569# error "port me"
5570#endif
5571 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5572
5573 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5574}
5575
5576
5577/**
5578 * Emits a standard prolog.
5579 */
5580static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5581{
5582#ifdef RT_ARCH_AMD64
5583 /*
5584 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5585 * reserving 64 bytes for stack variables plus 4 non-register argument
5586 * slots. Fixed register assignment: xBX = pReNative;
5587 *
5588 * Since we always do the same register spilling, we can use the same
5589 * unwind description for all the code.
5590 */
5591 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5592 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5593 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5594 pbCodeBuf[off++] = 0x8b;
5595 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5596 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5597 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5598# ifdef RT_OS_WINDOWS
5599 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5600 pbCodeBuf[off++] = 0x8b;
5601 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5602 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5603 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5604# else
5605 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5606 pbCodeBuf[off++] = 0x8b;
5607 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5608# endif
5609 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5610 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5611 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5612 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5613 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5614 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5615 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5616 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5617
5618 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5619 X86_GREG_xSP,
5620 IEMNATIVE_FRAME_ALIGN_SIZE
5621 + IEMNATIVE_FRAME_VAR_SIZE
5622 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5623 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5624 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5625 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5626 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5627
5628#elif RT_ARCH_ARM64
5629 /*
5630 * We set up a stack frame exactly like on x86, only we have to push the
5631 * return address our selves here. We save all non-volatile registers.
5632 */
5633 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5634
5635# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5636 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5637 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5638 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5639 /* pacibsp */
5640 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5641# endif
5642
5643 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5644 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5645 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5646 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5647 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5648 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5649 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5650 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5651 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5652 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5653 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5654 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5655 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5656 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5657 /* Save the BP and LR (ret address) registers at the top of the frame. */
5658 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5659 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5660 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5661 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5662 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
5663 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
5664
5665 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
5666 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
5667
5668 /* mov r28, r0 */
5669 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
5670 /* mov r27, r1 */
5671 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
5672
5673#else
5674# error "port me"
5675#endif
5676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5677 return off;
5678}
5679
5680
5681
5682
5683/*********************************************************************************************************************************
5684* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
5685*********************************************************************************************************************************/
5686
5687#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
5688 { \
5689 Assert(pReNative->Core.bmVars == 0); \
5690 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
5691 Assert(pReNative->Core.bmStack == 0); \
5692 pReNative->fMc = (a_fMcFlags); \
5693 pReNative->fCImpl = (a_fCImplFlags); \
5694 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
5695
5696/** We have to get to the end in recompilation mode, as otherwise we won't
5697 * generate code for all the IEM_MC_IF_XXX branches. */
5698#define IEM_MC_END() \
5699 iemNativeVarFreeAll(pReNative); \
5700 } return off
5701
5702
5703
5704/*********************************************************************************************************************************
5705* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
5706*********************************************************************************************************************************/
5707
5708#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
5709 pReNative->fMc = 0; \
5710 pReNative->fCImpl = (a_fFlags); \
5711 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
5712
5713
5714#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5715 pReNative->fMc = 0; \
5716 pReNative->fCImpl = (a_fFlags); \
5717 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
5718
5719DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5720 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5721 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
5722{
5723 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
5724}
5725
5726
5727#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5728 pReNative->fMc = 0; \
5729 pReNative->fCImpl = (a_fFlags); \
5730 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5731 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
5732
5733DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5734 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5735 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
5736{
5737 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
5738}
5739
5740
5741#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5742 pReNative->fMc = 0; \
5743 pReNative->fCImpl = (a_fFlags); \
5744 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5745 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
5746
5747DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5748 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5749 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
5750 uint64_t uArg2)
5751{
5752 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
5753}
5754
5755
5756
5757/*********************************************************************************************************************************
5758* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
5759*********************************************************************************************************************************/
5760
5761/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
5762 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5763DECL_INLINE_THROW(uint32_t)
5764iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5765{
5766 /*
5767 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5768 * return with special status code and make the execution loop deal with
5769 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5770 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5771 * could continue w/o interruption, it probably will drop into the
5772 * debugger, so not worth the effort of trying to services it here and we
5773 * just lump it in with the handling of the others.
5774 *
5775 * To simplify the code and the register state management even more (wrt
5776 * immediate in AND operation), we always update the flags and skip the
5777 * extra check associated conditional jump.
5778 */
5779 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5780 <= UINT32_MAX);
5781 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5782 kIemNativeGstRegUse_ForUpdate);
5783 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5784 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5785 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5786 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5787 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5788
5789 /* Free but don't flush the EFLAGS register. */
5790 iemNativeRegFreeTmp(pReNative, idxEflReg);
5791
5792 return off;
5793}
5794
5795
5796/** The VINF_SUCCESS dummy. */
5797template<int const a_rcNormal>
5798DECL_FORCE_INLINE(uint32_t)
5799iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5800{
5801 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
5802 if (a_rcNormal != VINF_SUCCESS)
5803 {
5804#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5805 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5806#endif
5807 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
5808 }
5809 return off;
5810}
5811
5812
5813#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
5814 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
5815 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5816
5817#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
5818 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
5819 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
5820 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5821
5822/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5823DECL_INLINE_THROW(uint32_t)
5824iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5825{
5826 /* Allocate a temporary PC register. */
5827 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5828
5829 /* Perform the addition and store the result. */
5830 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5831 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5832
5833 /* Free but don't flush the PC register. */
5834 iemNativeRegFreeTmp(pReNative, idxPcReg);
5835
5836 return off;
5837}
5838
5839
5840#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
5841 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
5842 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5843
5844#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
5845 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
5846 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
5847 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5848
5849/** Same as iemRegAddToEip32AndFinishingNoFlags. */
5850DECL_INLINE_THROW(uint32_t)
5851iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5852{
5853 /* Allocate a temporary PC register. */
5854 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5855
5856 /* Perform the addition and store the result. */
5857 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5858 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5859
5860 /* Free but don't flush the PC register. */
5861 iemNativeRegFreeTmp(pReNative, idxPcReg);
5862
5863 return off;
5864}
5865
5866
5867#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
5868 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
5869 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5870
5871#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
5872 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
5873 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
5874 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5875
5876/** Same as iemRegAddToIp16AndFinishingNoFlags. */
5877DECL_INLINE_THROW(uint32_t)
5878iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5879{
5880 /* Allocate a temporary PC register. */
5881 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5882
5883 /* Perform the addition and store the result. */
5884 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5885 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5886 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5887
5888 /* Free but don't flush the PC register. */
5889 iemNativeRegFreeTmp(pReNative, idxPcReg);
5890
5891 return off;
5892}
5893
5894
5895
5896/*********************************************************************************************************************************
5897* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
5898*********************************************************************************************************************************/
5899
5900#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
5901 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5902 (a_enmEffOpSize), pCallEntry->idxInstr); \
5903 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5904
5905#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
5906 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5907 (a_enmEffOpSize), pCallEntry->idxInstr); \
5908 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
5909 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5910
5911#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
5912 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5913 IEMMODE_16BIT, pCallEntry->idxInstr); \
5914 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5915
5916#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
5917 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5918 IEMMODE_16BIT, pCallEntry->idxInstr); \
5919 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
5920 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5921
5922#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
5923 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5924 IEMMODE_64BIT, pCallEntry->idxInstr); \
5925 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5926
5927#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
5928 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5929 IEMMODE_64BIT, pCallEntry->idxInstr); \
5930 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
5931 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5932
5933/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
5934 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
5935 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
5936DECL_INLINE_THROW(uint32_t)
5937iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5938 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5939{
5940 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
5941
5942 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5943 off = iemNativeRegFlushPendingWrites(pReNative, off);
5944
5945 /* Allocate a temporary PC register. */
5946 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5947
5948 /* Perform the addition. */
5949 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
5950
5951 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
5952 {
5953 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5954 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5955 }
5956 else
5957 {
5958 /* Just truncate the result to 16-bit IP. */
5959 Assert(enmEffOpSize == IEMMODE_16BIT);
5960 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5961 }
5962 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5963
5964 /* Free but don't flush the PC register. */
5965 iemNativeRegFreeTmp(pReNative, idxPcReg);
5966
5967 return off;
5968}
5969
5970
5971#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
5972 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5973 (a_enmEffOpSize), pCallEntry->idxInstr); \
5974 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5975
5976#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
5977 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5978 (a_enmEffOpSize), pCallEntry->idxInstr); \
5979 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
5980 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5981
5982#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
5983 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5984 IEMMODE_16BIT, pCallEntry->idxInstr); \
5985 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5986
5987#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
5988 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5989 IEMMODE_16BIT, pCallEntry->idxInstr); \
5990 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
5991 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5992
5993#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
5994 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5995 IEMMODE_32BIT, pCallEntry->idxInstr); \
5996 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
5997
5998#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
5999 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6000 IEMMODE_32BIT, pCallEntry->idxInstr); \
6001 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6002 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6003
6004/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6005 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6006 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6007DECL_INLINE_THROW(uint32_t)
6008iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6009 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6010{
6011 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6012
6013 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6014 off = iemNativeRegFlushPendingWrites(pReNative, off);
6015
6016 /* Allocate a temporary PC register. */
6017 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6018
6019 /* Perform the addition. */
6020 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6021
6022 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6023 if (enmEffOpSize == IEMMODE_16BIT)
6024 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6025
6026 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6027 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6028
6029 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6030
6031 /* Free but don't flush the PC register. */
6032 iemNativeRegFreeTmp(pReNative, idxPcReg);
6033
6034 return off;
6035}
6036
6037
6038#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6039 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6040 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6041
6042#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6043 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6044 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6045 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6046
6047#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6048 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6049 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6050
6051#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6052 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6053 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6054 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6055
6056#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6057 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6058 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6059
6060#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6061 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6062 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6063 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6064
6065/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6066DECL_INLINE_THROW(uint32_t)
6067iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6068 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6069{
6070 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6071 off = iemNativeRegFlushPendingWrites(pReNative, off);
6072
6073 /* Allocate a temporary PC register. */
6074 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6075
6076 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6077 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6078 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6079 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6080 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6081
6082 /* Free but don't flush the PC register. */
6083 iemNativeRegFreeTmp(pReNative, idxPcReg);
6084
6085 return off;
6086}
6087
6088
6089
6090/*********************************************************************************************************************************
6091* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6092*********************************************************************************************************************************/
6093
6094/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6095#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6096 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6097
6098/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6099#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6100 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6101
6102/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6103#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6104 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6105
6106/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6107 * clears flags. */
6108#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6109 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6110 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6111
6112/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6113 * clears flags. */
6114#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6115 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6116 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6117
6118/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6119 * clears flags. */
6120#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6121 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6122 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6123
6124#undef IEM_MC_SET_RIP_U16_AND_FINISH
6125
6126
6127/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6128#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6129 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6130
6131/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6132#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6133 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6134
6135/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6136 * clears flags. */
6137#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6138 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6139 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6140
6141/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6142 * and clears flags. */
6143#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6144 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6145 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6146
6147#undef IEM_MC_SET_RIP_U32_AND_FINISH
6148
6149
6150/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6151#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6152 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6153
6154/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6155 * and clears flags. */
6156#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6157 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6158 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6159
6160#undef IEM_MC_SET_RIP_U64_AND_FINISH
6161
6162
6163/** Same as iemRegRipJumpU16AndFinishNoFlags,
6164 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6165DECL_INLINE_THROW(uint32_t)
6166iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6167 uint8_t idxInstr, uint8_t cbVar)
6168{
6169 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6170 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
6171
6172 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6173 off = iemNativeRegFlushPendingWrites(pReNative, off);
6174
6175 /* Get a register with the new PC loaded from idxVarPc.
6176 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6177 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6178
6179 /* Check limit (may #GP(0) + exit TB). */
6180 if (!f64Bit)
6181 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6182 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6183 else if (cbVar > sizeof(uint32_t))
6184 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6185
6186 /* Store the result. */
6187 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6188
6189 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6190 /** @todo implictly free the variable? */
6191
6192 return off;
6193}
6194
6195
6196
6197/*********************************************************************************************************************************
6198* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
6199*********************************************************************************************************************************/
6200
6201/**
6202 * Pushes an IEM_MC_IF_XXX onto the condition stack.
6203 *
6204 * @returns Pointer to the condition stack entry on success, NULL on failure
6205 * (too many nestings)
6206 */
6207DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
6208{
6209 uint32_t const idxStack = pReNative->cCondDepth;
6210 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
6211
6212 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
6213 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
6214
6215 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
6216 pEntry->fInElse = false;
6217 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
6218 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
6219
6220 return pEntry;
6221}
6222
6223
6224/**
6225 * Start of the if-block, snapshotting the register and variable state.
6226 */
6227DECL_INLINE_THROW(void)
6228iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
6229{
6230 Assert(offIfBlock != UINT32_MAX);
6231 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6232 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6233 Assert(!pEntry->fInElse);
6234
6235 /* Define the start of the IF block if request or for disassembly purposes. */
6236 if (idxLabelIf != UINT32_MAX)
6237 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
6238#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6239 else
6240 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
6241#else
6242 RT_NOREF(offIfBlock);
6243#endif
6244
6245 /* Copy the initial state so we can restore it in the 'else' block. */
6246 pEntry->InitialState = pReNative->Core;
6247}
6248
6249
6250#define IEM_MC_ELSE() } while (0); \
6251 off = iemNativeEmitElse(pReNative, off); \
6252 do {
6253
6254/** Emits code related to IEM_MC_ELSE. */
6255DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6256{
6257 /* Check sanity and get the conditional stack entry. */
6258 Assert(off != UINT32_MAX);
6259 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6260 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6261 Assert(!pEntry->fInElse);
6262
6263 /* Jump to the endif */
6264 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
6265
6266 /* Define the else label and enter the else part of the condition. */
6267 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6268 pEntry->fInElse = true;
6269
6270 /* Snapshot the core state so we can do a merge at the endif and restore
6271 the snapshot we took at the start of the if-block. */
6272 pEntry->IfFinalState = pReNative->Core;
6273 pReNative->Core = pEntry->InitialState;
6274
6275 return off;
6276}
6277
6278
6279#define IEM_MC_ENDIF() } while (0); \
6280 off = iemNativeEmitEndIf(pReNative, off)
6281
6282/** Emits code related to IEM_MC_ENDIF. */
6283DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6284{
6285 /* Check sanity and get the conditional stack entry. */
6286 Assert(off != UINT32_MAX);
6287 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6288 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6289
6290 /*
6291 * Now we have find common group with the core state at the end of the
6292 * if-final. Use the smallest common denominator and just drop anything
6293 * that isn't the same in both states.
6294 */
6295 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
6296 * which is why we're doing this at the end of the else-block.
6297 * But we'd need more info about future for that to be worth the effort. */
6298 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
6299 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
6300 {
6301 /* shadow guest stuff first. */
6302 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
6303 if (fGstRegs)
6304 {
6305 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
6306 do
6307 {
6308 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
6309 fGstRegs &= ~RT_BIT_64(idxGstReg);
6310
6311 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6312 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
6313 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
6314 {
6315 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
6316 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
6317 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
6318 }
6319 } while (fGstRegs);
6320 }
6321 else
6322 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
6323
6324 /* Check variables next. For now we must require them to be identical
6325 or stuff we can recreate. */
6326 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
6327 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
6328 if (fVars)
6329 {
6330 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
6331 do
6332 {
6333 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
6334 fVars &= ~RT_BIT_32(idxVar);
6335
6336 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
6337 {
6338 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
6339 continue;
6340 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6341 {
6342 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6343 if (idxHstReg != UINT8_MAX)
6344 {
6345 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6346 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6347 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
6348 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6349 }
6350 continue;
6351 }
6352 }
6353 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
6354 continue;
6355
6356 /* Irreconcilable, so drop it. */
6357 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6358 if (idxHstReg != UINT8_MAX)
6359 {
6360 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6361 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6362 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
6363 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6364 }
6365 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
6366 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6367 } while (fVars);
6368 }
6369
6370 /* Finally, check that the host register allocations matches. */
6371 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
6372 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
6373 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
6374 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
6375 }
6376
6377 /*
6378 * Define the endif label and maybe the else one if we're still in the 'if' part.
6379 */
6380 if (!pEntry->fInElse)
6381 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6382 else
6383 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
6384 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
6385
6386 /* Pop the conditional stack.*/
6387 pReNative->cCondDepth -= 1;
6388
6389 return off;
6390}
6391
6392
6393#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
6394 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
6395 do {
6396
6397/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
6398DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6399{
6400 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6401
6402 /* Get the eflags. */
6403 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6404 kIemNativeGstRegUse_ReadOnly);
6405
6406 /* Test and jump. */
6407 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6408
6409 /* Free but don't flush the EFlags register. */
6410 iemNativeRegFreeTmp(pReNative, idxEflReg);
6411
6412 /* Make a copy of the core state now as we start the if-block. */
6413 iemNativeCondStartIfBlock(pReNative, off);
6414
6415 return off;
6416}
6417
6418
6419#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
6420 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
6421 do {
6422
6423/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
6424DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6425{
6426 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6427
6428 /* Get the eflags. */
6429 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6430 kIemNativeGstRegUse_ReadOnly);
6431
6432 /* Test and jump. */
6433 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6434
6435 /* Free but don't flush the EFlags register. */
6436 iemNativeRegFreeTmp(pReNative, idxEflReg);
6437
6438 /* Make a copy of the core state now as we start the if-block. */
6439 iemNativeCondStartIfBlock(pReNative, off);
6440
6441 return off;
6442}
6443
6444
6445#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
6446 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
6447 do {
6448
6449/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
6450DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6451{
6452 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6453
6454 /* Get the eflags. */
6455 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6456 kIemNativeGstRegUse_ReadOnly);
6457
6458 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6459 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6460
6461 /* Test and jump. */
6462 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6463
6464 /* Free but don't flush the EFlags register. */
6465 iemNativeRegFreeTmp(pReNative, idxEflReg);
6466
6467 /* Make a copy of the core state now as we start the if-block. */
6468 iemNativeCondStartIfBlock(pReNative, off);
6469
6470 return off;
6471}
6472
6473
6474#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
6475 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
6476 do {
6477
6478/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
6479DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6480{
6481 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6482
6483 /* Get the eflags. */
6484 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6485 kIemNativeGstRegUse_ReadOnly);
6486
6487 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6488 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6489
6490 /* Test and jump. */
6491 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6492
6493 /* Free but don't flush the EFlags register. */
6494 iemNativeRegFreeTmp(pReNative, idxEflReg);
6495
6496 /* Make a copy of the core state now as we start the if-block. */
6497 iemNativeCondStartIfBlock(pReNative, off);
6498
6499 return off;
6500}
6501
6502
6503#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6504 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6505 do {
6506
6507#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6508 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6509 do {
6510
6511/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6512DECL_INLINE_THROW(uint32_t)
6513iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6514 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6515{
6516 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6517
6518 /* Get the eflags. */
6519 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6520 kIemNativeGstRegUse_ReadOnly);
6521
6522 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6523 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6524
6525 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6526 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6527 Assert(iBitNo1 != iBitNo2);
6528
6529#ifdef RT_ARCH_AMD64
6530 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6531
6532 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6533 if (iBitNo1 > iBitNo2)
6534 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6535 else
6536 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6537 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6538
6539#elif defined(RT_ARCH_ARM64)
6540 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6541 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6542
6543 /* and tmpreg, eflreg, #1<<iBitNo1 */
6544 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6545
6546 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6547 if (iBitNo1 > iBitNo2)
6548 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6549 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6550 else
6551 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6552 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6553
6554 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6555
6556#else
6557# error "Port me"
6558#endif
6559
6560 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6561 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6562 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6563
6564 /* Free but don't flush the EFlags and tmp registers. */
6565 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6566 iemNativeRegFreeTmp(pReNative, idxEflReg);
6567
6568 /* Make a copy of the core state now as we start the if-block. */
6569 iemNativeCondStartIfBlock(pReNative, off);
6570
6571 return off;
6572}
6573
6574
6575#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6576 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6577 do {
6578
6579#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6580 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6581 do {
6582
6583/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6584 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6585DECL_INLINE_THROW(uint32_t)
6586iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6587 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6588{
6589 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6590
6591 /* We need an if-block label for the non-inverted variant. */
6592 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6593 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6594
6595 /* Get the eflags. */
6596 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6597 kIemNativeGstRegUse_ReadOnly);
6598
6599 /* Translate the flag masks to bit numbers. */
6600 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6601 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6602
6603 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6604 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6605 Assert(iBitNo1 != iBitNo);
6606
6607 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6608 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6609 Assert(iBitNo2 != iBitNo);
6610 Assert(iBitNo2 != iBitNo1);
6611
6612#ifdef RT_ARCH_AMD64
6613 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6614#elif defined(RT_ARCH_ARM64)
6615 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6616#endif
6617
6618 /* Check for the lone bit first. */
6619 if (!fInverted)
6620 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6621 else
6622 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6623
6624 /* Then extract and compare the other two bits. */
6625#ifdef RT_ARCH_AMD64
6626 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6627 if (iBitNo1 > iBitNo2)
6628 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6629 else
6630 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6631 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6632
6633#elif defined(RT_ARCH_ARM64)
6634 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6635
6636 /* and tmpreg, eflreg, #1<<iBitNo1 */
6637 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6638
6639 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6640 if (iBitNo1 > iBitNo2)
6641 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6642 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6643 else
6644 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6645 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6646
6647 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6648
6649#else
6650# error "Port me"
6651#endif
6652
6653 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6654 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6655 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6656
6657 /* Free but don't flush the EFlags and tmp registers. */
6658 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6659 iemNativeRegFreeTmp(pReNative, idxEflReg);
6660
6661 /* Make a copy of the core state now as we start the if-block. */
6662 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
6663
6664 return off;
6665}
6666
6667
6668#define IEM_MC_IF_CX_IS_NZ() \
6669 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
6670 do {
6671
6672/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6673DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6674{
6675 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6676
6677 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6678 kIemNativeGstRegUse_ReadOnly);
6679 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6680 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6681
6682 iemNativeCondStartIfBlock(pReNative, off);
6683 return off;
6684}
6685
6686
6687#define IEM_MC_IF_ECX_IS_NZ() \
6688 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
6689 do {
6690
6691#define IEM_MC_IF_RCX_IS_NZ() \
6692 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
6693 do {
6694
6695/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
6696DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6697{
6698 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6699
6700 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6701 kIemNativeGstRegUse_ReadOnly);
6702 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6703 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6704
6705 iemNativeCondStartIfBlock(pReNative, off);
6706 return off;
6707}
6708
6709
6710#define IEM_MC_IF_CX_IS_NOT_ONE() \
6711 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
6712 do {
6713
6714/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
6715DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6716{
6717 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6718
6719 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6720 kIemNativeGstRegUse_ReadOnly);
6721#ifdef RT_ARCH_AMD64
6722 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6723#else
6724 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6725 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
6726 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6727#endif
6728 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6729
6730 iemNativeCondStartIfBlock(pReNative, off);
6731 return off;
6732}
6733
6734
6735#define IEM_MC_IF_ECX_IS_NOT_ONE() \
6736 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
6737 do {
6738
6739#define IEM_MC_IF_RCX_IS_NOT_ONE() \
6740 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
6741 do {
6742
6743/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
6744DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6745{
6746 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6747
6748 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6749 kIemNativeGstRegUse_ReadOnly);
6750 if (f64Bit)
6751 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6752 else
6753 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6754 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6755
6756 iemNativeCondStartIfBlock(pReNative, off);
6757 return off;
6758}
6759
6760
6761#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
6762 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
6763 do {
6764
6765#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
6766 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
6767 do {
6768
6769/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
6770 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
6771DECL_INLINE_THROW(uint32_t)
6772iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
6773{
6774 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6775
6776 /* We have to load both RCX and EFLAGS before we can start branching,
6777 otherwise we'll end up in the else-block with an inconsistent
6778 register allocator state.
6779 Doing EFLAGS first as it's more likely to be loaded, right? */
6780 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6781 kIemNativeGstRegUse_ReadOnly);
6782 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6783 kIemNativeGstRegUse_ReadOnly);
6784
6785 /** @todo we could reduce this to a single branch instruction by spending a
6786 * temporary register and some setnz stuff. Not sure if loops are
6787 * worth it. */
6788 /* Check CX. */
6789#ifdef RT_ARCH_AMD64
6790 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6791#else
6792 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6793 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
6794 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6795#endif
6796
6797 /* Check the EFlags bit. */
6798 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6799 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6800 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6801 !fCheckIfSet /*fJmpIfSet*/);
6802
6803 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6804 iemNativeRegFreeTmp(pReNative, idxEflReg);
6805
6806 iemNativeCondStartIfBlock(pReNative, off);
6807 return off;
6808}
6809
6810
6811#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
6812 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
6813 do {
6814
6815#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
6816 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
6817 do {
6818
6819#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
6820 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
6821 do {
6822
6823#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
6824 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
6825 do {
6826
6827/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
6828 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
6829 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
6830 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
6831DECL_INLINE_THROW(uint32_t)
6832iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6833 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
6834{
6835 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6836
6837 /* We have to load both RCX and EFLAGS before we can start branching,
6838 otherwise we'll end up in the else-block with an inconsistent
6839 register allocator state.
6840 Doing EFLAGS first as it's more likely to be loaded, right? */
6841 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6842 kIemNativeGstRegUse_ReadOnly);
6843 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6844 kIemNativeGstRegUse_ReadOnly);
6845
6846 /** @todo we could reduce this to a single branch instruction by spending a
6847 * temporary register and some setnz stuff. Not sure if loops are
6848 * worth it. */
6849 /* Check RCX/ECX. */
6850 if (f64Bit)
6851 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6852 else
6853 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
6854
6855 /* Check the EFlags bit. */
6856 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6857 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6858 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6859 !fCheckIfSet /*fJmpIfSet*/);
6860
6861 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6862 iemNativeRegFreeTmp(pReNative, idxEflReg);
6863
6864 iemNativeCondStartIfBlock(pReNative, off);
6865 return off;
6866}
6867
6868
6869
6870/*********************************************************************************************************************************
6871* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6872*********************************************************************************************************************************/
6873/** Number of hidden arguments for CIMPL calls.
6874 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
6875#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6876# define IEM_CIMPL_HIDDEN_ARGS 3
6877#else
6878# define IEM_CIMPL_HIDDEN_ARGS 2
6879#endif
6880
6881#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
6882 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
6883
6884#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
6885 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
6886
6887#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
6888 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
6889
6890#define IEM_MC_LOCAL(a_Type, a_Name) \
6891 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
6892
6893#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
6894 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
6895
6896
6897/**
6898 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
6899 */
6900DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
6901{
6902 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
6903 return IEM_CIMPL_HIDDEN_ARGS;
6904 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
6905 return 1;
6906 return 0;
6907}
6908
6909
6910/**
6911 * Internal work that allocates a variable with kind set to
6912 * kIemNativeVarKind_Invalid and no current stack allocation.
6913 *
6914 * The kind will either be set by the caller or later when the variable is first
6915 * assigned a value.
6916 */
6917static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6918{
6919 Assert(cbType > 0 && cbType <= 64);
6920 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6921 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6922 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6923 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6924 pReNative->Core.aVars[idxVar].cbVar = cbType;
6925 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6926 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6927 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6928 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6929 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6930 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6931 pReNative->Core.aVars[idxVar].u.uValue = 0;
6932 return idxVar;
6933}
6934
6935
6936/**
6937 * Internal work that allocates an argument variable w/o setting enmKind.
6938 */
6939static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6940{
6941 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6942 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6943 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6944
6945 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6946 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
6947 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6948 return idxVar;
6949}
6950
6951
6952/**
6953 * Gets the stack slot for a stack variable, allocating one if necessary.
6954 *
6955 * Calling this function implies that the stack slot will contain a valid
6956 * variable value. The caller deals with any register currently assigned to the
6957 * variable, typically by spilling it into the stack slot.
6958 *
6959 * @returns The stack slot number.
6960 * @param pReNative The recompiler state.
6961 * @param idxVar The variable.
6962 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6963 */
6964DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6965{
6966 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6967 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6968
6969 /* Already got a slot? */
6970 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6971 if (idxStackSlot != UINT8_MAX)
6972 {
6973 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6974 return idxStackSlot;
6975 }
6976
6977 /*
6978 * A single slot is easy to allocate.
6979 * Allocate them from the top end, closest to BP, to reduce the displacement.
6980 */
6981 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
6982 {
6983 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6984 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6985 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6986 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6987 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
6988 return (uint8_t)iSlot;
6989 }
6990
6991 /*
6992 * We need more than one stack slot.
6993 *
6994 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6995 */
6996 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6997 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
6998 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
6999 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
7000 uint32_t bmStack = ~pReNative->Core.bmStack;
7001 while (bmStack != UINT32_MAX)
7002 {
7003/** @todo allocate from the top to reduce BP displacement. */
7004 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7005 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7006 if (!(iSlot & fBitAlignMask))
7007 {
7008 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7009 {
7010 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7011 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7012 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
7013 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
7014 return (uint8_t)iSlot;
7015 }
7016 }
7017 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7018 }
7019 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7020}
7021
7022
7023/**
7024 * Changes the variable to a stack variable.
7025 *
7026 * Currently this is s only possible to do the first time the variable is used,
7027 * switching later is can be implemented but not done.
7028 *
7029 * @param pReNative The recompiler state.
7030 * @param idxVar The variable.
7031 * @throws VERR_IEM_VAR_IPE_2
7032 */
7033static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7034{
7035 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7036 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7037 {
7038 /* We could in theory transition from immediate to stack as well, but it
7039 would involve the caller doing work storing the value on the stack. So,
7040 till that's required we only allow transition from invalid. */
7041 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7042 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7043 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7044 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
7045
7046 /* Note! We don't allocate a stack slot here, that's only done when a
7047 slot is actually needed to hold a variable value. */
7048 }
7049}
7050
7051
7052/**
7053 * Sets it to a variable with a constant value.
7054 *
7055 * This does not require stack storage as we know the value and can always
7056 * reload it, unless of course it's referenced.
7057 *
7058 * @param pReNative The recompiler state.
7059 * @param idxVar The variable.
7060 * @param uValue The immediate value.
7061 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7062 */
7063static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7064{
7065 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7066 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
7067 {
7068 /* Only simple transitions for now. */
7069 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7070 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7071 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
7072 }
7073 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7074
7075 pReNative->Core.aVars[idxVar].u.uValue = uValue;
7076 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
7077 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
7078 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
7079}
7080
7081
7082/**
7083 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7084 *
7085 * This does not require stack storage as we know the value and can always
7086 * reload it. Loading is postponed till needed.
7087 *
7088 * @param pReNative The recompiler state.
7089 * @param idxVar The variable.
7090 * @param idxOtherVar The variable to take the (stack) address of.
7091 *
7092 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7093 */
7094static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7095{
7096 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7097 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7098
7099 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7100 {
7101 /* Only simple transitions for now. */
7102 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7103 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7104 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7105 }
7106 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7107
7108 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
7109
7110 /* Update the other variable, ensure it's a stack variable. */
7111 /** @todo handle variables with const values... that'll go boom now. */
7112 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7113 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
7114}
7115
7116
7117/**
7118 * Sets the variable to a reference (pointer) to a guest register reference.
7119 *
7120 * This does not require stack storage as we know the value and can always
7121 * reload it. Loading is postponed till needed.
7122 *
7123 * @param pReNative The recompiler state.
7124 * @param idxVar The variable.
7125 * @param enmRegClass The class guest registers to reference.
7126 * @param idxReg The register within @a enmRegClass to reference.
7127 *
7128 * @throws VERR_IEM_VAR_IPE_2
7129 */
7130static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7131 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7132{
7133 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7134
7135 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
7136 {
7137 /* Only simple transitions for now. */
7138 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7139 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7140 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
7141 }
7142 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7143
7144 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
7145 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
7146}
7147
7148
7149DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7150{
7151 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7152}
7153
7154
7155DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7156{
7157 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7158
7159 /* Since we're using a generic uint64_t value type, we must truncate it if
7160 the variable is smaller otherwise we may end up with too large value when
7161 scaling up a imm8 w/ sign-extension.
7162
7163 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7164 in the bios, bx=1) when running on arm, because clang expect 16-bit
7165 register parameters to have bits 16 and up set to zero. Instead of
7166 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7167 CF value in the result. */
7168 switch (cbType)
7169 {
7170 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7171 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7172 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7173 }
7174 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7175 return idxVar;
7176}
7177
7178
7179DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7180{
7181 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7182 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7183 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7184 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7185
7186 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7187 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
7188 return idxArgVar;
7189}
7190
7191
7192DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7193{
7194 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7195 /* Don't set to stack now, leave that to the first use as for instance
7196 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7197 return idxVar;
7198}
7199
7200
7201DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7202{
7203 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7204
7205 /* Since we're using a generic uint64_t value type, we must truncate it if
7206 the variable is smaller otherwise we may end up with too large value when
7207 scaling up a imm8 w/ sign-extension. */
7208 switch (cbType)
7209 {
7210 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7211 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7212 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7213 }
7214 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7215 return idxVar;
7216}
7217
7218
7219/**
7220 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7221 * fixed till we call iemNativeVarRegisterRelease.
7222 *
7223 * @returns The host register number.
7224 * @param pReNative The recompiler state.
7225 * @param idxVar The variable.
7226 * @param poff Pointer to the instruction buffer offset.
7227 * In case a register needs to be freed up or the value
7228 * loaded off the stack.
7229 * @param fInitialized Set if the variable must already have been initialized.
7230 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7231 * the case.
7232 * @param idxRegPref Preferred register number or UINT8_MAX.
7233 */
7234DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7235 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7236{
7237 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7238 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
7239 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7240
7241 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7242 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7243 {
7244 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
7245 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7246 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7247 return idxReg;
7248 }
7249
7250 /*
7251 * If the kind of variable has not yet been set, default to 'stack'.
7252 */
7253 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
7254 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7255 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
7256 iemNativeVarSetKindToStack(pReNative, idxVar);
7257
7258 /*
7259 * We have to allocate a register for the variable, even if its a stack one
7260 * as we don't know if there are modification being made to it before its
7261 * finalized (todo: analyze and insert hints about that?).
7262 *
7263 * If we can, we try get the correct register for argument variables. This
7264 * is assuming that most argument variables are fetched as close as possible
7265 * to the actual call, so that there aren't any interfering hidden calls
7266 * (memory accesses, etc) inbetween.
7267 *
7268 * If we cannot or it's a variable, we make sure no argument registers
7269 * that will be used by this MC block will be allocated here, and we always
7270 * prefer non-volatile registers to avoid needing to spill stuff for internal
7271 * call.
7272 */
7273 /** @todo Detect too early argument value fetches and warn about hidden
7274 * calls causing less optimal code to be generated in the python script. */
7275
7276 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7277 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7278 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7279 {
7280 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7281 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7282 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7283 }
7284 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7285 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7286 {
7287 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7288 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7289 & ~pReNative->Core.bmHstRegsWithGstShadow
7290 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7291 & fNotArgsMask;
7292 if (fRegs)
7293 {
7294 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7295 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7296 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7297 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7298 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7299 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7300 }
7301 else
7302 {
7303 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7304 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7305 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7306 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7307 }
7308 }
7309 else
7310 {
7311 idxReg = idxRegPref;
7312 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7313 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
7314 }
7315 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7316 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7317
7318 /*
7319 * Load it off the stack if we've got a stack slot.
7320 */
7321 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7322 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7323 {
7324 Assert(fInitialized);
7325 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7326 switch (pReNative->Core.aVars[idxVar].cbVar)
7327 {
7328 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7329 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7330 case 3: AssertFailed(); RT_FALL_THRU();
7331 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7332 default: AssertFailed(); RT_FALL_THRU();
7333 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7334 }
7335 }
7336 else
7337 {
7338 Assert(idxStackSlot == UINT8_MAX);
7339 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7340 }
7341 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7342 return idxReg;
7343}
7344
7345
7346/**
7347 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7348 * guest register.
7349 *
7350 * This function makes sure there is a register for it and sets it to be the
7351 * current shadow copy of @a enmGstReg.
7352 *
7353 * @returns The host register number.
7354 * @param pReNative The recompiler state.
7355 * @param idxVar The variable.
7356 * @param enmGstReg The guest register this variable will be written to
7357 * after this call.
7358 * @param poff Pointer to the instruction buffer offset.
7359 * In case a register needs to be freed up or if the
7360 * variable content needs to be loaded off the stack.
7361 *
7362 * @note We DO NOT expect @a idxVar to be an argument variable,
7363 * because we can only in the commit stage of an instruction when this
7364 * function is used.
7365 */
7366DECL_HIDDEN_THROW(uint8_t)
7367iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7368{
7369 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7370 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7371 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
7372 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
7373 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
7374 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
7375 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7376 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7377
7378 /*
7379 * This shouldn't ever be used for arguments, unless it's in a weird else
7380 * branch that doesn't do any calling and even then it's questionable.
7381 *
7382 * However, in case someone writes crazy wrong MC code and does register
7383 * updates before making calls, just use the regular register allocator to
7384 * ensure we get a register suitable for the intended argument number.
7385 */
7386 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7387
7388 /*
7389 * If there is already a register for the variable, we transfer/set the
7390 * guest shadow copy assignment to it.
7391 */
7392 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7393 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7394 {
7395 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7396 {
7397 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7398 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7399 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7400 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7401 }
7402 else
7403 {
7404 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7405 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7406 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7407 }
7408 /** @todo figure this one out. We need some way of making sure the register isn't
7409 * modified after this point, just in case we start writing crappy MC code. */
7410 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
7411 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7412 return idxReg;
7413 }
7414 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7415
7416 /*
7417 * Because this is supposed to be the commit stage, we're just tag along with the
7418 * temporary register allocator and upgrade it to a variable register.
7419 */
7420 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7421 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7422 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7423 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7424 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7425 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7426
7427 /*
7428 * Now we need to load the register value.
7429 */
7430 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
7431 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
7432 else
7433 {
7434 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7435 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7436 switch (pReNative->Core.aVars[idxVar].cbVar)
7437 {
7438 case sizeof(uint64_t):
7439 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7440 break;
7441 case sizeof(uint32_t):
7442 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7443 break;
7444 case sizeof(uint16_t):
7445 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7446 break;
7447 case sizeof(uint8_t):
7448 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7449 break;
7450 default:
7451 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7452 }
7453 }
7454
7455 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7456 return idxReg;
7457}
7458
7459
7460/**
7461 * Sets the host register for @a idxVarRc to @a idxReg.
7462 *
7463 * The register must not be allocated. Any guest register shadowing will be
7464 * implictly dropped by this call.
7465 *
7466 * The variable must not have any register associated with it (causes
7467 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
7468 * implied.
7469 *
7470 * @returns idxReg
7471 * @param pReNative The recompiler state.
7472 * @param idxVar The variable.
7473 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
7474 * @param off For recording in debug info.
7475 *
7476 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
7477 */
7478DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
7479{
7480 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7481 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7482 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
7483 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
7484 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
7485
7486 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
7487 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7488
7489 iemNativeVarSetKindToStack(pReNative, idxVar);
7490 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7491
7492 return idxReg;
7493}
7494
7495
7496/**
7497 * A convenient helper function.
7498 */
7499DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7500 uint8_t idxReg, uint32_t *poff)
7501{
7502 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
7503 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7504 return idxReg;
7505}
7506
7507
7508/**
7509 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7510 *
7511 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7512 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7513 * requirement of flushing anything in volatile host registers when making a
7514 * call.
7515 *
7516 * @returns New @a off value.
7517 * @param pReNative The recompiler state.
7518 * @param off The code buffer position.
7519 * @param fHstRegsNotToSave Set of registers not to save & restore.
7520 */
7521DECL_HIDDEN_THROW(uint32_t)
7522iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7523{
7524 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7525 if (fHstRegs)
7526 {
7527 do
7528 {
7529 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7530 fHstRegs &= ~RT_BIT_32(idxHstReg);
7531
7532 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7533 {
7534 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7535 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7536 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7537 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7538 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7539 switch (pReNative->Core.aVars[idxVar].enmKind)
7540 {
7541 case kIemNativeVarKind_Stack:
7542 {
7543 /* Temporarily spill the variable register. */
7544 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7545 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7546 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7547 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7548 continue;
7549 }
7550
7551 case kIemNativeVarKind_Immediate:
7552 case kIemNativeVarKind_VarRef:
7553 case kIemNativeVarKind_GstRegRef:
7554 /* It is weird to have any of these loaded at this point. */
7555 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7556 continue;
7557
7558 case kIemNativeVarKind_End:
7559 case kIemNativeVarKind_Invalid:
7560 break;
7561 }
7562 AssertFailed();
7563 }
7564 else
7565 {
7566 /*
7567 * Allocate a temporary stack slot and spill the register to it.
7568 */
7569 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7570 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7571 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7572 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7573 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7574 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7575 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7576 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7577 }
7578 } while (fHstRegs);
7579 }
7580 return off;
7581}
7582
7583
7584/**
7585 * Emit code to restore volatile registers after to a call to a helper.
7586 *
7587 * @returns New @a off value.
7588 * @param pReNative The recompiler state.
7589 * @param off The code buffer position.
7590 * @param fHstRegsNotToSave Set of registers not to save & restore.
7591 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7592 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7593 */
7594DECL_HIDDEN_THROW(uint32_t)
7595iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7596{
7597 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7598 if (fHstRegs)
7599 {
7600 do
7601 {
7602 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7603 fHstRegs &= ~RT_BIT_32(idxHstReg);
7604
7605 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7606 {
7607 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7608 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7609 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7610 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7611 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7612 switch (pReNative->Core.aVars[idxVar].enmKind)
7613 {
7614 case kIemNativeVarKind_Stack:
7615 {
7616 /* Unspill the variable register. */
7617 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7618 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%d/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7619 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7620 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7621 continue;
7622 }
7623
7624 case kIemNativeVarKind_Immediate:
7625 case kIemNativeVarKind_VarRef:
7626 case kIemNativeVarKind_GstRegRef:
7627 /* It is weird to have any of these loaded at this point. */
7628 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7629 continue;
7630
7631 case kIemNativeVarKind_End:
7632 case kIemNativeVarKind_Invalid:
7633 break;
7634 }
7635 AssertFailed();
7636 }
7637 else
7638 {
7639 /*
7640 * Restore from temporary stack slot.
7641 */
7642 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7643 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7644 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7645 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7646
7647 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7648 }
7649 } while (fHstRegs);
7650 }
7651 return off;
7652}
7653
7654
7655/**
7656 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7657 *
7658 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7659 */
7660DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7661{
7662 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7663 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7664 {
7665 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7666 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7667 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7668 Assert(cSlots > 0);
7669 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7670 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
7671 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7672 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7673 }
7674 else
7675 Assert(idxStackSlot == UINT8_MAX);
7676}
7677
7678
7679/**
7680 * Worker that frees a single variable.
7681 *
7682 * ASSUMES that @a idxVar is valid.
7683 */
7684DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7685{
7686 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7687 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7688 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7689
7690 /* Free the host register first if any assigned. */
7691 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7692 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7693 {
7694 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7695 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7696 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7697 }
7698
7699 /* Free argument mapping. */
7700 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7701 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7702 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7703
7704 /* Free the stack slots. */
7705 iemNativeVarFreeStackSlots(pReNative, idxVar);
7706
7707 /* Free the actual variable. */
7708 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7709 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7710}
7711
7712
7713/**
7714 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7715 */
7716DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7717{
7718 while (bmVars != 0)
7719 {
7720 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7721 bmVars &= ~RT_BIT_32(idxVar);
7722
7723#if 1 /** @todo optimize by simplifying this later... */
7724 iemNativeVarFreeOneWorker(pReNative, idxVar);
7725#else
7726 /* Only need to free the host register, the rest is done as bulk updates below. */
7727 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7728 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7729 {
7730 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7731 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7732 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7733 }
7734#endif
7735 }
7736#if 0 /** @todo optimize by simplifying this later... */
7737 pReNative->Core.bmVars = 0;
7738 pReNative->Core.bmStack = 0;
7739 pReNative->Core.u64ArgVars = UINT64_MAX;
7740#endif
7741}
7742
7743
7744/**
7745 * This is called by IEM_MC_END() to clean up all variables.
7746 */
7747DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
7748{
7749 uint32_t const bmVars = pReNative->Core.bmVars;
7750 if (bmVars != 0)
7751 iemNativeVarFreeAllSlow(pReNative, bmVars);
7752 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7753 Assert(pReNative->Core.bmStack == 0);
7754}
7755
7756
7757#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
7758
7759/**
7760 * This is called by IEM_MC_FREE_LOCAL.
7761 */
7762DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7763{
7764 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7765 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7766 iemNativeVarFreeOneWorker(pReNative, idxVar);
7767}
7768
7769
7770#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
7771
7772/**
7773 * This is called by IEM_MC_FREE_ARG.
7774 */
7775DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7776{
7777 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7778 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
7779 iemNativeVarFreeOneWorker(pReNative, idxVar);
7780}
7781
7782
7783#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
7784
7785/**
7786 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
7787 */
7788DECL_INLINE_THROW(uint32_t)
7789iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
7790{
7791 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
7792 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
7793 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7794 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
7795 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
7796
7797 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
7798 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
7799 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
7800 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7801
7802 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
7803
7804 /*
7805 * Special case for immediates.
7806 */
7807 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
7808 {
7809 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7810 {
7811 case sizeof(uint16_t):
7812 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7813 break;
7814 case sizeof(uint32_t):
7815 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7816 break;
7817 default: AssertFailed(); break;
7818 }
7819 }
7820 else
7821 {
7822 /*
7823 * The generic solution for now.
7824 */
7825 /** @todo optimize this by having the python script make sure the source
7826 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
7827 * statement. Then we could just transfer the register assignments. */
7828 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
7829 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
7830 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7831 {
7832 case sizeof(uint16_t):
7833 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
7834 break;
7835 case sizeof(uint32_t):
7836 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
7837 break;
7838 default: AssertFailed(); break;
7839 }
7840 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
7841 iemNativeVarRegisterRelease(pReNative, idxVarDst);
7842 }
7843 return off;
7844}
7845
7846
7847
7848/*********************************************************************************************************************************
7849* Emitters for IEM_MC_CALL_CIMPL_XXX *
7850*********************************************************************************************************************************/
7851
7852/**
7853 * Emits code to load a reference to the given guest register into @a idxGprDst.
7854 */
7855DECL_INLINE_THROW(uint32_t)
7856iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7857 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7858{
7859 /*
7860 * Get the offset relative to the CPUMCTX structure.
7861 */
7862 uint32_t offCpumCtx;
7863 switch (enmClass)
7864 {
7865 case kIemNativeGstRegRef_Gpr:
7866 Assert(idxRegInClass < 16);
7867 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7868 break;
7869
7870 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7871 Assert(idxRegInClass < 4);
7872 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7873 break;
7874
7875 case kIemNativeGstRegRef_EFlags:
7876 Assert(idxRegInClass == 0);
7877 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7878 break;
7879
7880 case kIemNativeGstRegRef_MxCsr:
7881 Assert(idxRegInClass == 0);
7882 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7883 break;
7884
7885 case kIemNativeGstRegRef_FpuReg:
7886 Assert(idxRegInClass < 8);
7887 AssertFailed(); /** @todo what kind of indexing? */
7888 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7889 break;
7890
7891 case kIemNativeGstRegRef_MReg:
7892 Assert(idxRegInClass < 8);
7893 AssertFailed(); /** @todo what kind of indexing? */
7894 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7895 break;
7896
7897 case kIemNativeGstRegRef_XReg:
7898 Assert(idxRegInClass < 16);
7899 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7900 break;
7901
7902 default:
7903 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7904 }
7905
7906 /*
7907 * Load the value into the destination register.
7908 */
7909#ifdef RT_ARCH_AMD64
7910 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7911
7912#elif defined(RT_ARCH_ARM64)
7913 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7914 Assert(offCpumCtx < 4096);
7915 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7916
7917#else
7918# error "Port me!"
7919#endif
7920
7921 return off;
7922}
7923
7924
7925/**
7926 * Common code for CIMPL and AIMPL calls.
7927 *
7928 * These are calls that uses argument variables and such. They should not be
7929 * confused with internal calls required to implement an MC operation,
7930 * like a TLB load and similar.
7931 *
7932 * Upon return all that is left to do is to load any hidden arguments and
7933 * perform the call. All argument variables are freed.
7934 *
7935 * @returns New code buffer offset; throws VBox status code on error.
7936 * @param pReNative The native recompile state.
7937 * @param off The code buffer offset.
7938 * @param cArgs The total nubmer of arguments (includes hidden
7939 * count).
7940 * @param cHiddenArgs The number of hidden arguments. The hidden
7941 * arguments must not have any variable declared for
7942 * them, whereas all the regular arguments must
7943 * (tstIEMCheckMc ensures this).
7944 */
7945DECL_HIDDEN_THROW(uint32_t)
7946iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7947{
7948#ifdef VBOX_STRICT
7949 /*
7950 * Assert sanity.
7951 */
7952 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7953 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7954 for (unsigned i = 0; i < cHiddenArgs; i++)
7955 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7956 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7957 {
7958 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7959 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7960 }
7961 iemNativeRegAssertSanity(pReNative);
7962#endif
7963
7964 /*
7965 * Before we do anything else, go over variables that are referenced and
7966 * make sure they are not in a register.
7967 */
7968 uint32_t bmVars = pReNative->Core.bmVars;
7969 if (bmVars)
7970 {
7971 do
7972 {
7973 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7974 bmVars &= ~RT_BIT_32(idxVar);
7975
7976 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7977 {
7978 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7979 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7980 {
7981 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7982 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7983 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7984 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7985 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7986
7987 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7988 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7989 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7990 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7991 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7992 }
7993 }
7994 } while (bmVars != 0);
7995#if 0 //def VBOX_STRICT
7996 iemNativeRegAssertSanity(pReNative);
7997#endif
7998 }
7999
8000 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8001
8002 /*
8003 * First, go over the host registers that will be used for arguments and make
8004 * sure they either hold the desired argument or are free.
8005 */
8006 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8007 {
8008 for (uint32_t i = 0; i < cRegArgs; i++)
8009 {
8010 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8011 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8012 {
8013 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8014 {
8015 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8016 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8017 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
8018 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8019 if (uArgNo == i)
8020 { /* prefect */ }
8021 /* The variable allocator logic should make sure this is impossible,
8022 except for when the return register is used as a parameter (ARM,
8023 but not x86). */
8024#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8025 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8026 {
8027# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8028# error "Implement this"
8029# endif
8030 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8031 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8032 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8033 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8034 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8035 }
8036#endif
8037 else
8038 {
8039 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8040
8041 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
8042 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8043 else
8044 {
8045 /* just free it, can be reloaded if used again */
8046 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8047 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8048 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8049 }
8050 }
8051 }
8052 else
8053 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8054 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8055 }
8056 }
8057#if 0 //def VBOX_STRICT
8058 iemNativeRegAssertSanity(pReNative);
8059#endif
8060 }
8061
8062 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8063
8064#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8065 /*
8066 * If there are any stack arguments, make sure they are in their place as well.
8067 *
8068 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8069 * the caller) be loading it later and it must be free (see first loop).
8070 */
8071 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8072 {
8073 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8074 {
8075 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8076 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8077 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8078 {
8079 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8080 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
8081 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
8082 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8083 }
8084 else
8085 {
8086 /* Use ARG0 as temp for stuff we need registers for. */
8087 switch (pReNative->Core.aVars[idxVar].enmKind)
8088 {
8089 case kIemNativeVarKind_Stack:
8090 {
8091 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8092 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8093 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8094 iemNativeStackCalcBpDisp(idxStackSlot));
8095 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8096 continue;
8097 }
8098
8099 case kIemNativeVarKind_Immediate:
8100 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
8101 continue;
8102
8103 case kIemNativeVarKind_VarRef:
8104 {
8105 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8106 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8107 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8108 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8109 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8110 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8111 {
8112 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8113 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8114 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8115 }
8116 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8117 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8118 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8119 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8120 continue;
8121 }
8122
8123 case kIemNativeVarKind_GstRegRef:
8124 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8125 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8126 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8127 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8128 continue;
8129
8130 case kIemNativeVarKind_Invalid:
8131 case kIemNativeVarKind_End:
8132 break;
8133 }
8134 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8135 }
8136 }
8137# if 0 //def VBOX_STRICT
8138 iemNativeRegAssertSanity(pReNative);
8139# endif
8140 }
8141#else
8142 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8143#endif
8144
8145 /*
8146 * Make sure the argument variables are loaded into their respective registers.
8147 *
8148 * We can optimize this by ASSUMING that any register allocations are for
8149 * registeres that have already been loaded and are ready. The previous step
8150 * saw to that.
8151 */
8152 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8153 {
8154 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8155 {
8156 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8157 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8158 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
8159 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8160 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8161 else
8162 {
8163 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8164 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8165 {
8166 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
8167 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
8168 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
8169 | RT_BIT_32(idxArgReg);
8170 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
8171 }
8172 else
8173 {
8174 /* Use ARG0 as temp for stuff we need registers for. */
8175 switch (pReNative->Core.aVars[idxVar].enmKind)
8176 {
8177 case kIemNativeVarKind_Stack:
8178 {
8179 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8180 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8181 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8182 continue;
8183 }
8184
8185 case kIemNativeVarKind_Immediate:
8186 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
8187 continue;
8188
8189 case kIemNativeVarKind_VarRef:
8190 {
8191 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8192 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8193 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8194 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8195 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8196 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8197 {
8198 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8199 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8200 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8201 }
8202 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8203 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8204 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8205 continue;
8206 }
8207
8208 case kIemNativeVarKind_GstRegRef:
8209 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8210 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8211 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8212 continue;
8213
8214 case kIemNativeVarKind_Invalid:
8215 case kIemNativeVarKind_End:
8216 break;
8217 }
8218 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8219 }
8220 }
8221 }
8222#if 0 //def VBOX_STRICT
8223 iemNativeRegAssertSanity(pReNative);
8224#endif
8225 }
8226#ifdef VBOX_STRICT
8227 else
8228 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8229 {
8230 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8231 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8232 }
8233#endif
8234
8235 /*
8236 * Free all argument variables (simplified).
8237 * Their lifetime always expires with the call they are for.
8238 */
8239 /** @todo Make the python script check that arguments aren't used after
8240 * IEM_MC_CALL_XXXX. */
8241 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8242 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8243 * an argument value. There is also some FPU stuff. */
8244 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8245 {
8246 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8247 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8248
8249 /* no need to free registers: */
8250 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8251 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8252 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8253 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8254 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8255 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8256
8257 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8258 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8259 iemNativeVarFreeStackSlots(pReNative, idxVar);
8260 }
8261 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8262
8263 /*
8264 * Flush volatile registers as we make the call.
8265 */
8266 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8267
8268 return off;
8269}
8270
8271
8272/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
8273DECL_HIDDEN_THROW(uint32_t)
8274iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
8275 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
8276
8277{
8278 /*
8279 * Do all the call setup and cleanup.
8280 */
8281 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
8282
8283 /*
8284 * Load the two or three hidden arguments.
8285 */
8286#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8287 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
8288 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8289 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
8290#else
8291 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8292 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
8293#endif
8294
8295 /*
8296 * Make the call and check the return code.
8297 *
8298 * Shadow PC copies are always flushed here, other stuff depends on flags.
8299 * Segment and general purpose registers are explictily flushed via the
8300 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
8301 * macros.
8302 */
8303 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
8304#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8305 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
8306#endif
8307 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
8308 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
8309 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
8310 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
8311
8312 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
8313}
8314
8315
8316#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
8317 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
8318
8319/** Emits code for IEM_MC_CALL_CIMPL_1. */
8320DECL_INLINE_THROW(uint32_t)
8321iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8322 uintptr_t pfnCImpl, uint8_t idxArg0)
8323{
8324 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8325 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
8326}
8327
8328
8329#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
8330 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
8331
8332/** Emits code for IEM_MC_CALL_CIMPL_2. */
8333DECL_INLINE_THROW(uint32_t)
8334iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8335 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
8336{
8337 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8338 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8339 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
8340}
8341
8342
8343#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
8344 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8345 (uintptr_t)a_pfnCImpl, a0, a1, a2)
8346
8347/** Emits code for IEM_MC_CALL_CIMPL_3. */
8348DECL_INLINE_THROW(uint32_t)
8349iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8350 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8351{
8352 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8353 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8354 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8355 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
8356}
8357
8358
8359#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
8360 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8361 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
8362
8363/** Emits code for IEM_MC_CALL_CIMPL_4. */
8364DECL_INLINE_THROW(uint32_t)
8365iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8366 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8367{
8368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8369 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8370 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8371 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8372 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
8373}
8374
8375
8376#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
8377 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8378 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
8379
8380/** Emits code for IEM_MC_CALL_CIMPL_4. */
8381DECL_INLINE_THROW(uint32_t)
8382iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8383 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
8384{
8385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8386 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8387 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8388 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8389 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
8390 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
8391}
8392
8393
8394/** Recompiler debugging: Flush guest register shadow copies. */
8395#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
8396
8397
8398
8399/*********************************************************************************************************************************
8400* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
8401*********************************************************************************************************************************/
8402
8403/**
8404 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
8405 */
8406DECL_INLINE_THROW(uint32_t)
8407iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8408 uintptr_t pfnAImpl, uint8_t cArgs)
8409{
8410 if (idxVarRc != UINT8_MAX)
8411 {
8412 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
8413 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
8414 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
8415 }
8416
8417 /*
8418 * Do all the call setup and cleanup.
8419 */
8420 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
8421
8422 /*
8423 * Make the call and update the return code variable if we've got one.
8424 */
8425 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8426 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
8427 {
8428pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
8429 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
8430 }
8431
8432 return off;
8433}
8434
8435
8436
8437#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
8438 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
8439
8440#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
8441 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
8442
8443/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
8444DECL_INLINE_THROW(uint32_t)
8445iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
8446{
8447 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
8448}
8449
8450
8451#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
8452 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
8453
8454#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
8455 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
8456
8457/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
8458DECL_INLINE_THROW(uint32_t)
8459iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
8460{
8461 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8462 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
8463}
8464
8465
8466#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
8467 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
8468
8469#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
8470 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
8471
8472/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
8473DECL_INLINE_THROW(uint32_t)
8474iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8475 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8476{
8477 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8478 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8479 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
8480}
8481
8482
8483#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
8484 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
8485
8486#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
8487 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
8488
8489/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
8490DECL_INLINE_THROW(uint32_t)
8491iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8492 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8493{
8494 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8495 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8496 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8497 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
8498}
8499
8500
8501#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
8502 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8503
8504#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
8505 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8506
8507/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
8508DECL_INLINE_THROW(uint32_t)
8509iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8510 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8511{
8512 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8513 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8514 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8515 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
8516 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
8517}
8518
8519
8520
8521/*********************************************************************************************************************************
8522* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
8523*********************************************************************************************************************************/
8524
8525#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
8526 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
8527
8528#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8529 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
8530
8531#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8532 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
8533
8534#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8535 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
8536
8537
8538/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
8539 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
8540DECL_INLINE_THROW(uint32_t)
8541iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
8542{
8543 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8544 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8545 Assert(iGRegEx < 20);
8546
8547 /* Same discussion as in iemNativeEmitFetchGregU16 */
8548 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8549 kIemNativeGstRegUse_ReadOnly);
8550
8551 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8552 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8553
8554 /* The value is zero-extended to the full 64-bit host register width. */
8555 if (iGRegEx < 16)
8556 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8557 else
8558 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8559
8560 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8561 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8562 return off;
8563}
8564
8565
8566#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8567 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
8568
8569#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8570 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
8571
8572#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8573 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
8574
8575/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
8576DECL_INLINE_THROW(uint32_t)
8577iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
8578{
8579 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8580 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8581 Assert(iGRegEx < 20);
8582
8583 /* Same discussion as in iemNativeEmitFetchGregU16 */
8584 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8585 kIemNativeGstRegUse_ReadOnly);
8586
8587 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8588 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8589
8590 if (iGRegEx < 16)
8591 {
8592 switch (cbSignExtended)
8593 {
8594 case sizeof(uint16_t):
8595 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8596 break;
8597 case sizeof(uint32_t):
8598 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8599 break;
8600 case sizeof(uint64_t):
8601 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8602 break;
8603 default: AssertFailed(); break;
8604 }
8605 }
8606 else
8607 {
8608 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8609 switch (cbSignExtended)
8610 {
8611 case sizeof(uint16_t):
8612 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8613 break;
8614 case sizeof(uint32_t):
8615 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8616 break;
8617 case sizeof(uint64_t):
8618 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8619 break;
8620 default: AssertFailed(); break;
8621 }
8622 }
8623
8624 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8625 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8626 return off;
8627}
8628
8629
8630
8631#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
8632 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
8633
8634#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
8635 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8636
8637#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
8638 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8639
8640/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
8641DECL_INLINE_THROW(uint32_t)
8642iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8643{
8644 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8645 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8646 Assert(iGReg < 16);
8647
8648 /*
8649 * We can either just load the low 16-bit of the GPR into a host register
8650 * for the variable, or we can do so via a shadow copy host register. The
8651 * latter will avoid having to reload it if it's being stored later, but
8652 * will waste a host register if it isn't touched again. Since we don't
8653 * know what going to happen, we choose the latter for now.
8654 */
8655 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8656 kIemNativeGstRegUse_ReadOnly);
8657
8658 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8659 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8660 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8661 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8662
8663 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8664 return off;
8665}
8666
8667
8668#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
8669 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8670
8671#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
8672 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8673
8674/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
8675DECL_INLINE_THROW(uint32_t)
8676iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
8677{
8678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8679 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8680 Assert(iGReg < 16);
8681
8682 /*
8683 * We can either just load the low 16-bit of the GPR into a host register
8684 * for the variable, or we can do so via a shadow copy host register. The
8685 * latter will avoid having to reload it if it's being stored later, but
8686 * will waste a host register if it isn't touched again. Since we don't
8687 * know what going to happen, we choose the latter for now.
8688 */
8689 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8690 kIemNativeGstRegUse_ReadOnly);
8691
8692 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8693 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8694 if (cbSignExtended == sizeof(uint32_t))
8695 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8696 else
8697 {
8698 Assert(cbSignExtended == sizeof(uint64_t));
8699 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8700 }
8701 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8702
8703 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8704 return off;
8705}
8706
8707
8708#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
8709 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
8710
8711#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
8712 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
8713
8714/** Emits code for IEM_MC_FETCH_GREG_U32. */
8715DECL_INLINE_THROW(uint32_t)
8716iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8717{
8718 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8719 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
8720 Assert(iGReg < 16);
8721
8722 /*
8723 * We can either just load the low 16-bit of the GPR into a host register
8724 * for the variable, or we can do so via a shadow copy host register. The
8725 * latter will avoid having to reload it if it's being stored later, but
8726 * will waste a host register if it isn't touched again. Since we don't
8727 * know what going to happen, we choose the latter for now.
8728 */
8729 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8730 kIemNativeGstRegUse_ReadOnly);
8731
8732 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8733 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8734 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8735 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8736
8737 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8738 return off;
8739}
8740
8741
8742#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
8743 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
8744
8745/** Emits code for IEM_MC_FETCH_GREG_U32. */
8746DECL_INLINE_THROW(uint32_t)
8747iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8748{
8749 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8750 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8751 Assert(iGReg < 16);
8752
8753 /*
8754 * We can either just load the low 32-bit of the GPR into a host register
8755 * for the variable, or we can do so via a shadow copy host register. The
8756 * latter will avoid having to reload it if it's being stored later, but
8757 * will waste a host register if it isn't touched again. Since we don't
8758 * know what going to happen, we choose the latter for now.
8759 */
8760 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8761 kIemNativeGstRegUse_ReadOnly);
8762
8763 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8764 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8765 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8766 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8767
8768 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8769 return off;
8770}
8771
8772
8773#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
8774 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8775
8776#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
8777 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8778
8779/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
8780 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
8781DECL_INLINE_THROW(uint32_t)
8782iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8783{
8784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8785 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8786 Assert(iGReg < 16);
8787
8788 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8789 kIemNativeGstRegUse_ReadOnly);
8790
8791 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8792 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8793 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
8794 /** @todo name the register a shadow one already? */
8795 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8796
8797 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8798 return off;
8799}
8800
8801
8802
8803/*********************************************************************************************************************************
8804* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
8805*********************************************************************************************************************************/
8806
8807#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
8808 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
8809
8810/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
8811DECL_INLINE_THROW(uint32_t)
8812iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
8813{
8814 Assert(iGRegEx < 20);
8815 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8816 kIemNativeGstRegUse_ForUpdate);
8817#ifdef RT_ARCH_AMD64
8818 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8819
8820 /* To the lowest byte of the register: mov r8, imm8 */
8821 if (iGRegEx < 16)
8822 {
8823 if (idxGstTmpReg >= 8)
8824 pbCodeBuf[off++] = X86_OP_REX_B;
8825 else if (idxGstTmpReg >= 4)
8826 pbCodeBuf[off++] = X86_OP_REX;
8827 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8828 pbCodeBuf[off++] = u8Value;
8829 }
8830 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
8831 else if (idxGstTmpReg < 4)
8832 {
8833 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
8834 pbCodeBuf[off++] = u8Value;
8835 }
8836 else
8837 {
8838 /* ror reg64, 8 */
8839 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8840 pbCodeBuf[off++] = 0xc1;
8841 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8842 pbCodeBuf[off++] = 8;
8843
8844 /* mov reg8, imm8 */
8845 if (idxGstTmpReg >= 8)
8846 pbCodeBuf[off++] = X86_OP_REX_B;
8847 else if (idxGstTmpReg >= 4)
8848 pbCodeBuf[off++] = X86_OP_REX;
8849 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8850 pbCodeBuf[off++] = u8Value;
8851
8852 /* rol reg64, 8 */
8853 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8854 pbCodeBuf[off++] = 0xc1;
8855 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8856 pbCodeBuf[off++] = 8;
8857 }
8858
8859#elif defined(RT_ARCH_ARM64)
8860 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
8861 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8862 if (iGRegEx < 16)
8863 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
8864 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
8865 else
8866 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
8867 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
8868 iemNativeRegFreeTmp(pReNative, idxImmReg);
8869
8870#else
8871# error "Port me!"
8872#endif
8873
8874 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8875
8876 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8877
8878 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8879 return off;
8880}
8881
8882
8883#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
8884 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
8885
8886/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
8887DECL_INLINE_THROW(uint32_t)
8888iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
8889{
8890 Assert(iGRegEx < 20);
8891 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8892
8893 /*
8894 * If it's a constant value (unlikely) we treat this as a
8895 * IEM_MC_STORE_GREG_U8_CONST statement.
8896 */
8897 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8898 { /* likely */ }
8899 else
8900 {
8901 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8902 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8903 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8904 }
8905
8906 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8907 kIemNativeGstRegUse_ForUpdate);
8908 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8909
8910#ifdef RT_ARCH_AMD64
8911 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
8912 if (iGRegEx < 16)
8913 {
8914 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8915 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8916 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8917 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8918 pbCodeBuf[off++] = X86_OP_REX;
8919 pbCodeBuf[off++] = 0x8a;
8920 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8921 }
8922 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
8923 else if (idxGstTmpReg < 4 && idxVarReg < 4)
8924 {
8925 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
8926 pbCodeBuf[off++] = 0x8a;
8927 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
8928 }
8929 else
8930 {
8931 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
8932
8933 /* ror reg64, 8 */
8934 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8935 pbCodeBuf[off++] = 0xc1;
8936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8937 pbCodeBuf[off++] = 8;
8938
8939 /* mov reg8, reg8(r/m) */
8940 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8941 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8942 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8943 pbCodeBuf[off++] = X86_OP_REX;
8944 pbCodeBuf[off++] = 0x8a;
8945 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8946
8947 /* rol reg64, 8 */
8948 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8949 pbCodeBuf[off++] = 0xc1;
8950 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8951 pbCodeBuf[off++] = 8;
8952 }
8953
8954#elif defined(RT_ARCH_ARM64)
8955 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
8956 or
8957 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
8958 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8959 if (iGRegEx < 16)
8960 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
8961 else
8962 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
8963
8964#else
8965# error "Port me!"
8966#endif
8967 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8968
8969 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8970
8971 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8972 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8973 return off;
8974}
8975
8976
8977
8978#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
8979 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
8980
8981/** Emits code for IEM_MC_STORE_GREG_U16. */
8982DECL_INLINE_THROW(uint32_t)
8983iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
8984{
8985 Assert(iGReg < 16);
8986 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8987 kIemNativeGstRegUse_ForUpdate);
8988#ifdef RT_ARCH_AMD64
8989 /* mov reg16, imm16 */
8990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8991 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8992 if (idxGstTmpReg >= 8)
8993 pbCodeBuf[off++] = X86_OP_REX_B;
8994 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
8995 pbCodeBuf[off++] = RT_BYTE1(uValue);
8996 pbCodeBuf[off++] = RT_BYTE2(uValue);
8997
8998#elif defined(RT_ARCH_ARM64)
8999 /* movk xdst, #uValue, lsl #0 */
9000 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9001 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
9002
9003#else
9004# error "Port me!"
9005#endif
9006
9007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9008
9009 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9010 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9011 return off;
9012}
9013
9014
9015#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
9016 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
9017
9018/** Emits code for IEM_MC_STORE_GREG_U16. */
9019DECL_INLINE_THROW(uint32_t)
9020iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9021{
9022 Assert(iGReg < 16);
9023 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9024
9025 /*
9026 * If it's a constant value (unlikely) we treat this as a
9027 * IEM_MC_STORE_GREG_U16_CONST statement.
9028 */
9029 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9030 { /* likely */ }
9031 else
9032 {
9033 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9034 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9035 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9036 }
9037
9038 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9039 kIemNativeGstRegUse_ForUpdate);
9040
9041#ifdef RT_ARCH_AMD64
9042 /* mov reg16, reg16 or [mem16] */
9043 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9044 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9045 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9046 {
9047 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
9048 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
9049 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
9050 pbCodeBuf[off++] = 0x8b;
9051 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
9052 }
9053 else
9054 {
9055 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
9056 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9057 if (idxGstTmpReg >= 8)
9058 pbCodeBuf[off++] = X86_OP_REX_R;
9059 pbCodeBuf[off++] = 0x8b;
9060 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9061 }
9062
9063#elif defined(RT_ARCH_ARM64)
9064 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
9065 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9066 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9067 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
9068 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9069
9070#else
9071# error "Port me!"
9072#endif
9073
9074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9075
9076 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9077 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9078 return off;
9079}
9080
9081
9082#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
9083 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
9084
9085/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
9086DECL_INLINE_THROW(uint32_t)
9087iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
9088{
9089 Assert(iGReg < 16);
9090 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9091 kIemNativeGstRegUse_ForFullWrite);
9092 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9093 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9094 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9095 return off;
9096}
9097
9098
9099#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
9100 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
9101
9102/** Emits code for IEM_MC_STORE_GREG_U32. */
9103DECL_INLINE_THROW(uint32_t)
9104iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9105{
9106 Assert(iGReg < 16);
9107 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9108
9109 /*
9110 * If it's a constant value (unlikely) we treat this as a
9111 * IEM_MC_STORE_GREG_U32_CONST statement.
9112 */
9113 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9114 { /* likely */ }
9115 else
9116 {
9117 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9118 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9119 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9120 }
9121
9122 /*
9123 * For the rest we allocate a guest register for the variable and writes
9124 * it to the CPUMCTX structure.
9125 */
9126 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9127 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9128#ifdef VBOX_STRICT
9129 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
9130#endif
9131 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9132 return off;
9133}
9134
9135
9136#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
9137 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
9138
9139/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
9140DECL_INLINE_THROW(uint32_t)
9141iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
9142{
9143 Assert(iGReg < 16);
9144 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9145 kIemNativeGstRegUse_ForFullWrite);
9146 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9147 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9148 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9149 return off;
9150}
9151
9152
9153#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
9154 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
9155
9156/** Emits code for IEM_MC_STORE_GREG_U64. */
9157DECL_INLINE_THROW(uint32_t)
9158iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9159{
9160 Assert(iGReg < 16);
9161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9162
9163 /*
9164 * If it's a constant value (unlikely) we treat this as a
9165 * IEM_MC_STORE_GREG_U64_CONST statement.
9166 */
9167 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9168 { /* likely */ }
9169 else
9170 {
9171 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9172 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9173 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
9174 }
9175
9176 /*
9177 * For the rest we allocate a guest register for the variable and writes
9178 * it to the CPUMCTX structure.
9179 */
9180 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9181 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9182 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9183 return off;
9184}
9185
9186
9187#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
9188 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
9189
9190/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
9191DECL_INLINE_THROW(uint32_t)
9192iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
9193{
9194 Assert(iGReg < 16);
9195 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9196 kIemNativeGstRegUse_ForUpdate);
9197 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
9198 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9199 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9200 return off;
9201}
9202
9203
9204/*********************************************************************************************************************************
9205* General purpose register manipulation (add, sub). *
9206*********************************************************************************************************************************/
9207
9208#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9209 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9210
9211/** Emits code for IEM_MC_ADD_GREG_U16. */
9212DECL_INLINE_THROW(uint32_t)
9213iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
9214{
9215 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9216 kIemNativeGstRegUse_ForUpdate);
9217
9218#ifdef RT_ARCH_AMD64
9219 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9220 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9221 if (idxGstTmpReg >= 8)
9222 pbCodeBuf[off++] = X86_OP_REX_B;
9223 if (uAddend == 1)
9224 {
9225 pbCodeBuf[off++] = 0xff; /* inc */
9226 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9227 }
9228 else
9229 {
9230 pbCodeBuf[off++] = 0x81;
9231 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9232 pbCodeBuf[off++] = uAddend;
9233 pbCodeBuf[off++] = 0;
9234 }
9235
9236#else
9237 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9238 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9239
9240 /* sub tmp, gstgrp, uAddend */
9241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
9242
9243 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9244 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9245
9246 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9247#endif
9248
9249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9250
9251 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9252
9253 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9254 return off;
9255}
9256
9257
9258#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
9259 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9260
9261#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
9262 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9263
9264/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
9265DECL_INLINE_THROW(uint32_t)
9266iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
9267{
9268 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9269 kIemNativeGstRegUse_ForUpdate);
9270
9271#ifdef RT_ARCH_AMD64
9272 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9273 if (f64Bit)
9274 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9275 else if (idxGstTmpReg >= 8)
9276 pbCodeBuf[off++] = X86_OP_REX_B;
9277 if (uAddend == 1)
9278 {
9279 pbCodeBuf[off++] = 0xff; /* inc */
9280 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9281 }
9282 else if (uAddend < 128)
9283 {
9284 pbCodeBuf[off++] = 0x83; /* add */
9285 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9286 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9287 }
9288 else
9289 {
9290 pbCodeBuf[off++] = 0x81; /* add */
9291 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9292 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9293 pbCodeBuf[off++] = 0;
9294 pbCodeBuf[off++] = 0;
9295 pbCodeBuf[off++] = 0;
9296 }
9297
9298#else
9299 /* sub tmp, gstgrp, uAddend */
9300 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9301 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
9302
9303#endif
9304
9305 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9306
9307 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9308
9309 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9310 return off;
9311}
9312
9313
9314
9315#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9316 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9317
9318/** Emits code for IEM_MC_SUB_GREG_U16. */
9319DECL_INLINE_THROW(uint32_t)
9320iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
9321{
9322 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9323 kIemNativeGstRegUse_ForUpdate);
9324
9325#ifdef RT_ARCH_AMD64
9326 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9327 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9328 if (idxGstTmpReg >= 8)
9329 pbCodeBuf[off++] = X86_OP_REX_B;
9330 if (uSubtrahend == 1)
9331 {
9332 pbCodeBuf[off++] = 0xff; /* dec */
9333 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9334 }
9335 else
9336 {
9337 pbCodeBuf[off++] = 0x81;
9338 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9339 pbCodeBuf[off++] = uSubtrahend;
9340 pbCodeBuf[off++] = 0;
9341 }
9342
9343#else
9344 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9345 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9346
9347 /* sub tmp, gstgrp, uSubtrahend */
9348 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
9349
9350 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9351 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9352
9353 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9354#endif
9355
9356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9357
9358 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9359
9360 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9361 return off;
9362}
9363
9364
9365#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
9366 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9367
9368#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
9369 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9370
9371/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
9372DECL_INLINE_THROW(uint32_t)
9373iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
9374{
9375 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9376 kIemNativeGstRegUse_ForUpdate);
9377
9378#ifdef RT_ARCH_AMD64
9379 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9380 if (f64Bit)
9381 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9382 else if (idxGstTmpReg >= 8)
9383 pbCodeBuf[off++] = X86_OP_REX_B;
9384 if (uSubtrahend == 1)
9385 {
9386 pbCodeBuf[off++] = 0xff; /* dec */
9387 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9388 }
9389 else if (uSubtrahend < 128)
9390 {
9391 pbCodeBuf[off++] = 0x83; /* sub */
9392 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9393 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9394 }
9395 else
9396 {
9397 pbCodeBuf[off++] = 0x81; /* sub */
9398 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9399 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9400 pbCodeBuf[off++] = 0;
9401 pbCodeBuf[off++] = 0;
9402 pbCodeBuf[off++] = 0;
9403 }
9404
9405#else
9406 /* sub tmp, gstgrp, uSubtrahend */
9407 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9408 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
9409
9410#endif
9411
9412 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9413
9414 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9415
9416 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9417 return off;
9418}
9419
9420
9421
9422/*********************************************************************************************************************************
9423* EFLAGS *
9424*********************************************************************************************************************************/
9425
9426#define IEM_MC_FETCH_EFLAGS(a_EFlags) \
9427 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
9428
9429/** Handles IEM_MC_FETCH_EFLAGS. */
9430DECL_INLINE_THROW(uint32_t)
9431iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
9432{
9433 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9434 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9435
9436 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
9437 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9438 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
9439 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9440 return off;
9441}
9442
9443
9444#define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
9445 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
9446
9447/** Handles IEM_MC_COMMIT_EFLAGS. */
9448DECL_INLINE_THROW(uint32_t)
9449iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
9450{
9451 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9452 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9453
9454 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
9455
9456#ifdef VBOX_STRICT
9457 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
9458 uint32_t offFixup = off;
9459 off = iemNativeEmitJnzToFixed(pReNative, off, off);
9460 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
9461 iemNativeFixupFixedJump(pReNative, offFixup, off);
9462
9463 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
9464 offFixup = off;
9465 off = iemNativeEmitJzToFixed(pReNative, off, off);
9466 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
9467 iemNativeFixupFixedJump(pReNative, offFixup, off);
9468#endif
9469
9470 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9471 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
9472 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9473 return off;
9474}
9475
9476
9477
9478/*********************************************************************************************************************************
9479* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
9480*********************************************************************************************************************************/
9481
9482#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
9483 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
9484
9485#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
9486 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
9487
9488#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
9489 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
9490
9491
9492/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
9493 * IEM_MC_FETCH_SREG_ZX_U64. */
9494DECL_INLINE_THROW(uint32_t)
9495iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
9496{
9497 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9498 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
9499 Assert(iSReg < X86_SREG_COUNT);
9500
9501 /*
9502 * For now, we will not create a shadow copy of a selector. The rational
9503 * is that since we do not recompile the popping and loading of segment
9504 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
9505 * pushing and moving to registers, there is only a small chance that the
9506 * shadow copy will be accessed again before the register is reloaded. One
9507 * scenario would be nested called in 16-bit code, but I doubt it's worth
9508 * the extra register pressure atm.
9509 *
9510 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
9511 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
9512 * store scencario covered at present (r160730).
9513 */
9514 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9515 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9516 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
9517 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9518 return off;
9519}
9520
9521
9522
9523/*********************************************************************************************************************************
9524* Register references. *
9525*********************************************************************************************************************************/
9526
9527#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
9528 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
9529
9530#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
9531 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
9532
9533/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
9534DECL_INLINE_THROW(uint32_t)
9535iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
9536{
9537 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9538 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9539 Assert(iGRegEx < 20);
9540
9541 if (iGRegEx < 16)
9542 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9543 else
9544 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
9545
9546 /* If we've delayed writing back the register value, flush it now. */
9547 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9548
9549 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9550 if (!fConst)
9551 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
9552
9553 return off;
9554}
9555
9556#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
9557 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
9558
9559#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
9560 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
9561
9562#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
9563 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
9564
9565#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
9566 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
9567
9568#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
9569 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
9570
9571#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
9572 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
9573
9574#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
9575 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
9576
9577#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
9578 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
9579
9580#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
9581 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
9582
9583#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
9584 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
9585
9586/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
9587DECL_INLINE_THROW(uint32_t)
9588iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
9589{
9590 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9591 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9592 Assert(iGReg < 16);
9593
9594 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
9595
9596 /* If we've delayed writing back the register value, flush it now. */
9597 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
9598
9599 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9600 if (!fConst)
9601 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
9602
9603 return off;
9604}
9605
9606
9607#define IEM_MC_REF_EFLAGS(a_pEFlags) \
9608 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
9609
9610/** Handles IEM_MC_REF_EFLAGS. */
9611DECL_INLINE_THROW(uint32_t)
9612iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
9613{
9614 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9615 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9616
9617 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
9618
9619 /* If we've delayed writing back the register value, flush it now. */
9620 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
9621
9622 /* If there is a shadow copy of guest EFLAGS, flush it now. */
9623 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
9624
9625 return off;
9626}
9627
9628
9629/*********************************************************************************************************************************
9630* Effective Address Calculation *
9631*********************************************************************************************************************************/
9632#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
9633 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
9634
9635/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
9636 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
9637DECL_INLINE_THROW(uint32_t)
9638iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9639 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
9640{
9641 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9642
9643 /*
9644 * Handle the disp16 form with no registers first.
9645 *
9646 * Convert to an immediate value, as that'll delay the register allocation
9647 * and assignment till the memory access / call / whatever and we can use
9648 * a more appropriate register (or none at all).
9649 */
9650 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
9651 {
9652 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
9653 return off;
9654 }
9655
9656 /* Determin the displacment. */
9657 uint16_t u16EffAddr;
9658 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9659 {
9660 case 0: u16EffAddr = 0; break;
9661 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
9662 case 2: u16EffAddr = u16Disp; break;
9663 default: AssertFailedStmt(u16EffAddr = 0);
9664 }
9665
9666 /* Determine the registers involved. */
9667 uint8_t idxGstRegBase;
9668 uint8_t idxGstRegIndex;
9669 switch (bRm & X86_MODRM_RM_MASK)
9670 {
9671 case 0:
9672 idxGstRegBase = X86_GREG_xBX;
9673 idxGstRegIndex = X86_GREG_xSI;
9674 break;
9675 case 1:
9676 idxGstRegBase = X86_GREG_xBX;
9677 idxGstRegIndex = X86_GREG_xDI;
9678 break;
9679 case 2:
9680 idxGstRegBase = X86_GREG_xBP;
9681 idxGstRegIndex = X86_GREG_xSI;
9682 break;
9683 case 3:
9684 idxGstRegBase = X86_GREG_xBP;
9685 idxGstRegIndex = X86_GREG_xDI;
9686 break;
9687 case 4:
9688 idxGstRegBase = X86_GREG_xSI;
9689 idxGstRegIndex = UINT8_MAX;
9690 break;
9691 case 5:
9692 idxGstRegBase = X86_GREG_xDI;
9693 idxGstRegIndex = UINT8_MAX;
9694 break;
9695 case 6:
9696 idxGstRegBase = X86_GREG_xBP;
9697 idxGstRegIndex = UINT8_MAX;
9698 break;
9699#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
9700 default:
9701#endif
9702 case 7:
9703 idxGstRegBase = X86_GREG_xBX;
9704 idxGstRegIndex = UINT8_MAX;
9705 break;
9706 }
9707
9708 /*
9709 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
9710 */
9711 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9712 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9713 kIemNativeGstRegUse_ReadOnly);
9714 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
9715 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9716 kIemNativeGstRegUse_ReadOnly)
9717 : UINT8_MAX;
9718#ifdef RT_ARCH_AMD64
9719 if (idxRegIndex == UINT8_MAX)
9720 {
9721 if (u16EffAddr == 0)
9722 {
9723 /* movxz ret, base */
9724 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
9725 }
9726 else
9727 {
9728 /* lea ret32, [base64 + disp32] */
9729 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9730 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9731 if (idxRegRet >= 8 || idxRegBase >= 8)
9732 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9733 pbCodeBuf[off++] = 0x8d;
9734 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9735 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
9736 else
9737 {
9738 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
9739 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9740 }
9741 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9742 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9743 pbCodeBuf[off++] = 0;
9744 pbCodeBuf[off++] = 0;
9745 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9746
9747 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9748 }
9749 }
9750 else
9751 {
9752 /* lea ret32, [index64 + base64 (+ disp32)] */
9753 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9754 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9755 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9756 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9757 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9758 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9759 pbCodeBuf[off++] = 0x8d;
9760 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
9761 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9762 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
9763 if (bMod == X86_MOD_MEM4)
9764 {
9765 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9766 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9767 pbCodeBuf[off++] = 0;
9768 pbCodeBuf[off++] = 0;
9769 }
9770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9771 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9772 }
9773
9774#elif defined(RT_ARCH_ARM64)
9775 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9776 if (u16EffAddr == 0)
9777 {
9778 if (idxRegIndex == UINT8_MAX)
9779 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
9780 else
9781 {
9782 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
9783 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9784 }
9785 }
9786 else
9787 {
9788 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
9789 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
9790 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
9791 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9792 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
9793 else
9794 {
9795 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
9796 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9797 }
9798 if (idxRegIndex != UINT8_MAX)
9799 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
9800 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9801 }
9802
9803#else
9804# error "port me"
9805#endif
9806
9807 if (idxRegIndex != UINT8_MAX)
9808 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9809 iemNativeRegFreeTmp(pReNative, idxRegBase);
9810 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9811 return off;
9812}
9813
9814
9815#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
9816 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
9817
9818/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
9819 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
9820DECL_INLINE_THROW(uint32_t)
9821iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9822 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
9823{
9824 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9825
9826 /*
9827 * Handle the disp32 form with no registers first.
9828 *
9829 * Convert to an immediate value, as that'll delay the register allocation
9830 * and assignment till the memory access / call / whatever and we can use
9831 * a more appropriate register (or none at all).
9832 */
9833 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9834 {
9835 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
9836 return off;
9837 }
9838
9839 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9840 uint32_t u32EffAddr = 0;
9841 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9842 {
9843 case 0: break;
9844 case 1: u32EffAddr = (int8_t)u32Disp; break;
9845 case 2: u32EffAddr = u32Disp; break;
9846 default: AssertFailed();
9847 }
9848
9849 /* Get the register (or SIB) value. */
9850 uint8_t idxGstRegBase = UINT8_MAX;
9851 uint8_t idxGstRegIndex = UINT8_MAX;
9852 uint8_t cShiftIndex = 0;
9853 switch (bRm & X86_MODRM_RM_MASK)
9854 {
9855 case 0: idxGstRegBase = X86_GREG_xAX; break;
9856 case 1: idxGstRegBase = X86_GREG_xCX; break;
9857 case 2: idxGstRegBase = X86_GREG_xDX; break;
9858 case 3: idxGstRegBase = X86_GREG_xBX; break;
9859 case 4: /* SIB */
9860 {
9861 /* index /w scaling . */
9862 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9863 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9864 {
9865 case 0: idxGstRegIndex = X86_GREG_xAX; break;
9866 case 1: idxGstRegIndex = X86_GREG_xCX; break;
9867 case 2: idxGstRegIndex = X86_GREG_xDX; break;
9868 case 3: idxGstRegIndex = X86_GREG_xBX; break;
9869 case 4: cShiftIndex = 0; /*no index*/ break;
9870 case 5: idxGstRegIndex = X86_GREG_xBP; break;
9871 case 6: idxGstRegIndex = X86_GREG_xSI; break;
9872 case 7: idxGstRegIndex = X86_GREG_xDI; break;
9873 }
9874
9875 /* base */
9876 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
9877 {
9878 case 0: idxGstRegBase = X86_GREG_xAX; break;
9879 case 1: idxGstRegBase = X86_GREG_xCX; break;
9880 case 2: idxGstRegBase = X86_GREG_xDX; break;
9881 case 3: idxGstRegBase = X86_GREG_xBX; break;
9882 case 4:
9883 idxGstRegBase = X86_GREG_xSP;
9884 u32EffAddr += uSibAndRspOffset >> 8;
9885 break;
9886 case 5:
9887 if ((bRm & X86_MODRM_MOD_MASK) != 0)
9888 idxGstRegBase = X86_GREG_xBP;
9889 else
9890 {
9891 Assert(u32EffAddr == 0);
9892 u32EffAddr = u32Disp;
9893 }
9894 break;
9895 case 6: idxGstRegBase = X86_GREG_xSI; break;
9896 case 7: idxGstRegBase = X86_GREG_xDI; break;
9897 }
9898 break;
9899 }
9900 case 5: idxGstRegBase = X86_GREG_xBP; break;
9901 case 6: idxGstRegBase = X86_GREG_xSI; break;
9902 case 7: idxGstRegBase = X86_GREG_xDI; break;
9903 }
9904
9905 /*
9906 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9907 * the start of the function.
9908 */
9909 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9910 {
9911 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
9912 return off;
9913 }
9914
9915 /*
9916 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9917 */
9918 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9919 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9920 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9921 kIemNativeGstRegUse_ReadOnly);
9922 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9923 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9924 kIemNativeGstRegUse_ReadOnly);
9925
9926 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9927 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9928 {
9929 idxRegBase = idxRegIndex;
9930 idxRegIndex = UINT8_MAX;
9931 }
9932
9933#ifdef RT_ARCH_AMD64
9934 if (idxRegIndex == UINT8_MAX)
9935 {
9936 if (u32EffAddr == 0)
9937 {
9938 /* mov ret, base */
9939 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9940 }
9941 else
9942 {
9943 /* lea ret32, [base64 + disp32] */
9944 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9945 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9946 if (idxRegRet >= 8 || idxRegBase >= 8)
9947 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9948 pbCodeBuf[off++] = 0x8d;
9949 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9950 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9951 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9952 else
9953 {
9954 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9955 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9956 }
9957 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9958 if (bMod == X86_MOD_MEM4)
9959 {
9960 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9961 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9962 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9963 }
9964 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9965 }
9966 }
9967 else
9968 {
9969 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9970 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9971 if (idxRegBase == UINT8_MAX)
9972 {
9973 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
9974 if (idxRegRet >= 8 || idxRegIndex >= 8)
9975 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9976 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9977 pbCodeBuf[off++] = 0x8d;
9978 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9979 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9980 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9981 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9982 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9983 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9984 }
9985 else
9986 {
9987 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9988 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9989 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9990 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9991 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9992 pbCodeBuf[off++] = 0x8d;
9993 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9994 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9995 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9996 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9997 if (bMod != X86_MOD_MEM0)
9998 {
9999 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10000 if (bMod == X86_MOD_MEM4)
10001 {
10002 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10003 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10004 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10005 }
10006 }
10007 }
10008 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10009 }
10010
10011#elif defined(RT_ARCH_ARM64)
10012 if (u32EffAddr == 0)
10013 {
10014 if (idxRegIndex == UINT8_MAX)
10015 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10016 else if (idxRegBase == UINT8_MAX)
10017 {
10018 if (cShiftIndex == 0)
10019 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
10020 else
10021 {
10022 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10023 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
10024 }
10025 }
10026 else
10027 {
10028 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10029 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10030 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10031 }
10032 }
10033 else
10034 {
10035 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
10036 {
10037 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10038 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
10039 }
10040 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
10041 {
10042 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10043 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10044 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
10045 }
10046 else
10047 {
10048 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
10049 if (idxRegBase != UINT8_MAX)
10050 {
10051 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10052 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10053 }
10054 }
10055 if (idxRegIndex != UINT8_MAX)
10056 {
10057 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10058 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10059 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10060 }
10061 }
10062
10063#else
10064# error "port me"
10065#endif
10066
10067 if (idxRegIndex != UINT8_MAX)
10068 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10069 if (idxRegBase != UINT8_MAX)
10070 iemNativeRegFreeTmp(pReNative, idxRegBase);
10071 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10072 return off;
10073}
10074
10075
10076#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10077 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10078 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10079
10080#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10081 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10082 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10083
10084#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10085 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10086 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
10087
10088/**
10089 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
10090 *
10091 * @returns New off.
10092 * @param pReNative .
10093 * @param off .
10094 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
10095 * bit 4 to REX.X. The two bits are part of the
10096 * REG sub-field, which isn't needed in this
10097 * function.
10098 * @param uSibAndRspOffset Two parts:
10099 * - The first 8 bits make up the SIB byte.
10100 * - The next 8 bits are the fixed RSP/ESP offset
10101 * in case of a pop [xSP].
10102 * @param u32Disp The displacement byte/word/dword, if any.
10103 * @param cbInstr The size of the fully decoded instruction. Used
10104 * for RIP relative addressing.
10105 * @param idxVarRet The result variable number.
10106 * @param f64Bit Whether to use a 64-bit or 32-bit address size
10107 * when calculating the address.
10108 *
10109 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
10110 */
10111DECL_INLINE_THROW(uint32_t)
10112iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
10113 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
10114{
10115 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10116
10117 /*
10118 * Special case the rip + disp32 form first.
10119 */
10120 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10121 {
10122 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10123 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
10124 kIemNativeGstRegUse_ReadOnly);
10125#ifdef RT_ARCH_AMD64
10126 if (f64Bit)
10127 {
10128 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
10129 if ((int32_t)offFinalDisp == offFinalDisp)
10130 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
10131 else
10132 {
10133 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
10134 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
10135 }
10136 }
10137 else
10138 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
10139
10140#elif defined(RT_ARCH_ARM64)
10141 if (f64Bit)
10142 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10143 (int64_t)(int32_t)u32Disp + cbInstr);
10144 else
10145 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10146 (int32_t)u32Disp + cbInstr);
10147
10148#else
10149# error "Port me!"
10150#endif
10151 iemNativeRegFreeTmp(pReNative, idxRegPc);
10152 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10153 return off;
10154 }
10155
10156 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10157 int64_t i64EffAddr = 0;
10158 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10159 {
10160 case 0: break;
10161 case 1: i64EffAddr = (int8_t)u32Disp; break;
10162 case 2: i64EffAddr = (int32_t)u32Disp; break;
10163 default: AssertFailed();
10164 }
10165
10166 /* Get the register (or SIB) value. */
10167 uint8_t idxGstRegBase = UINT8_MAX;
10168 uint8_t idxGstRegIndex = UINT8_MAX;
10169 uint8_t cShiftIndex = 0;
10170 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
10171 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
10172 else /* SIB: */
10173 {
10174 /* index /w scaling . */
10175 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10176 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10177 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
10178 if (idxGstRegIndex == 4)
10179 {
10180 /* no index */
10181 cShiftIndex = 0;
10182 idxGstRegIndex = UINT8_MAX;
10183 }
10184
10185 /* base */
10186 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
10187 if (idxGstRegBase == 4)
10188 {
10189 /* pop [rsp] hack */
10190 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
10191 }
10192 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
10193 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
10194 {
10195 /* mod=0 and base=5 -> disp32, no base reg. */
10196 Assert(i64EffAddr == 0);
10197 i64EffAddr = (int32_t)u32Disp;
10198 idxGstRegBase = UINT8_MAX;
10199 }
10200 }
10201
10202 /*
10203 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10204 * the start of the function.
10205 */
10206 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10207 {
10208 if (f64Bit)
10209 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
10210 else
10211 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
10212 return off;
10213 }
10214
10215 /*
10216 * Now emit code that calculates:
10217 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10218 * or if !f64Bit:
10219 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10220 */
10221 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10222 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10223 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10224 kIemNativeGstRegUse_ReadOnly);
10225 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10226 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10227 kIemNativeGstRegUse_ReadOnly);
10228
10229 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10230 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10231 {
10232 idxRegBase = idxRegIndex;
10233 idxRegIndex = UINT8_MAX;
10234 }
10235
10236#ifdef RT_ARCH_AMD64
10237 uint8_t bFinalAdj;
10238 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
10239 bFinalAdj = 0; /* likely */
10240 else
10241 {
10242 /* pop [rsp] with a problematic disp32 value. Split out the
10243 RSP offset and add it separately afterwards (bFinalAdj). */
10244 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
10245 Assert(idxGstRegBase == X86_GREG_xSP);
10246 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
10247 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
10248 Assert(bFinalAdj != 0);
10249 i64EffAddr -= bFinalAdj;
10250 Assert((int32_t)i64EffAddr == i64EffAddr);
10251 }
10252 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
10253//pReNative->pInstrBuf[off++] = 0xcc;
10254
10255 if (idxRegIndex == UINT8_MAX)
10256 {
10257 if (u32EffAddr == 0)
10258 {
10259 /* mov ret, base */
10260 if (f64Bit)
10261 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
10262 else
10263 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10264 }
10265 else
10266 {
10267 /* lea ret, [base + disp32] */
10268 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10269 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10270 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
10271 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10272 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10273 | (f64Bit ? X86_OP_REX_W : 0);
10274 pbCodeBuf[off++] = 0x8d;
10275 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10276 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10277 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10278 else
10279 {
10280 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10281 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10282 }
10283 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10284 if (bMod == X86_MOD_MEM4)
10285 {
10286 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10287 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10288 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10289 }
10290 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10291 }
10292 }
10293 else
10294 {
10295 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10296 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10297 if (idxRegBase == UINT8_MAX)
10298 {
10299 /* lea ret, [(index64 << cShiftIndex) + disp32] */
10300 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
10301 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10302 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10303 | (f64Bit ? X86_OP_REX_W : 0);
10304 pbCodeBuf[off++] = 0x8d;
10305 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10306 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10307 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10308 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10309 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10310 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10311 }
10312 else
10313 {
10314 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10315 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10316 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10317 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10318 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10319 | (f64Bit ? X86_OP_REX_W : 0);
10320 pbCodeBuf[off++] = 0x8d;
10321 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10322 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10323 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10324 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10325 if (bMod != X86_MOD_MEM0)
10326 {
10327 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10328 if (bMod == X86_MOD_MEM4)
10329 {
10330 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10331 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10332 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10333 }
10334 }
10335 }
10336 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10337 }
10338
10339 if (!bFinalAdj)
10340 { /* likely */ }
10341 else
10342 {
10343 Assert(f64Bit);
10344 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
10345 }
10346
10347#elif defined(RT_ARCH_ARM64)
10348 if (i64EffAddr == 0)
10349 {
10350 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10351 if (idxRegIndex == UINT8_MAX)
10352 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
10353 else if (idxRegBase != UINT8_MAX)
10354 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10355 f64Bit, false /*fSetFlags*/, cShiftIndex);
10356 else
10357 {
10358 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
10359 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
10360 }
10361 }
10362 else
10363 {
10364 if (f64Bit)
10365 { /* likely */ }
10366 else
10367 i64EffAddr = (int32_t)i64EffAddr;
10368
10369 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
10370 {
10371 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10372 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
10373 }
10374 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
10375 {
10376 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10377 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
10378 }
10379 else
10380 {
10381 if (f64Bit)
10382 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
10383 else
10384 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
10385 if (idxRegBase != UINT8_MAX)
10386 {
10387 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10388 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
10389 }
10390 }
10391 if (idxRegIndex != UINT8_MAX)
10392 {
10393 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10394 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10395 f64Bit, false /*fSetFlags*/, cShiftIndex);
10396 }
10397 }
10398
10399#else
10400# error "port me"
10401#endif
10402
10403 if (idxRegIndex != UINT8_MAX)
10404 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10405 if (idxRegBase != UINT8_MAX)
10406 iemNativeRegFreeTmp(pReNative, idxRegBase);
10407 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10408 return off;
10409}
10410
10411
10412/*********************************************************************************************************************************
10413* TLB Lookup. *
10414*********************************************************************************************************************************/
10415
10416/**
10417 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
10418 */
10419DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
10420{
10421 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
10422 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
10423 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
10424 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
10425
10426 /* Do the lookup manually. */
10427 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
10428 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
10429 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
10430 if (RT_LIKELY(pTlbe->uTag == uTag))
10431 {
10432 /*
10433 * Check TLB page table level access flags.
10434 */
10435 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
10436 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
10437 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
10438 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
10439 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
10440 | IEMTLBE_F_PG_UNASSIGNED
10441 | IEMTLBE_F_PT_NO_ACCESSED
10442 | fNoWriteNoDirty | fNoUser);
10443 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
10444 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
10445 {
10446 /*
10447 * Return the address.
10448 */
10449 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
10450 if ((uintptr_t)pbAddr == uResult)
10451 return;
10452 RT_NOREF(cbMem);
10453 AssertFailed();
10454 }
10455 else
10456 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
10457 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
10458 }
10459 else
10460 AssertFailed();
10461 RT_BREAKPOINT();
10462}
10463
10464/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
10465
10466
10467/*********************************************************************************************************************************
10468* Memory fetches and stores common *
10469*********************************************************************************************************************************/
10470
10471typedef enum IEMNATIVEMITMEMOP
10472{
10473 kIemNativeEmitMemOp_Store = 0,
10474 kIemNativeEmitMemOp_Fetch,
10475 kIemNativeEmitMemOp_Fetch_Zx_U16,
10476 kIemNativeEmitMemOp_Fetch_Zx_U32,
10477 kIemNativeEmitMemOp_Fetch_Zx_U64,
10478 kIemNativeEmitMemOp_Fetch_Sx_U16,
10479 kIemNativeEmitMemOp_Fetch_Sx_U32,
10480 kIemNativeEmitMemOp_Fetch_Sx_U64
10481} IEMNATIVEMITMEMOP;
10482
10483/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
10484 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
10485 * (with iSegReg = UINT8_MAX). */
10486DECL_INLINE_THROW(uint32_t)
10487iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
10488 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
10489 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
10490{
10491 /*
10492 * Assert sanity.
10493 */
10494 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10495 Assert( enmOp != kIemNativeEmitMemOp_Store
10496 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
10497 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
10498 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
10499 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10500 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
10501 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10502 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
10503 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
10504 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
10505#ifdef VBOX_STRICT
10506 if (iSegReg == UINT8_MAX)
10507 {
10508 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10509 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10510 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10511 switch (cbMem)
10512 {
10513 case 1:
10514 Assert( pfnFunction
10515 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
10516 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10517 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10518 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10519 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10520 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
10521 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
10522 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
10523 : UINT64_C(0xc000b000a0009000) ));
10524 break;
10525 case 2:
10526 Assert( pfnFunction
10527 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
10528 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10529 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10530 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10531 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
10532 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
10533 : UINT64_C(0xc000b000a0009000) ));
10534 break;
10535 case 4:
10536 Assert( pfnFunction
10537 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
10538 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10539 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10540 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
10541 : UINT64_C(0xc000b000a0009000) ));
10542 break;
10543 case 8:
10544 Assert( pfnFunction
10545 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
10546 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
10547 : UINT64_C(0xc000b000a0009000) ));
10548 break;
10549 }
10550 }
10551 else
10552 {
10553 Assert(iSegReg < 6);
10554 switch (cbMem)
10555 {
10556 case 1:
10557 Assert( pfnFunction
10558 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
10559 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
10560 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10561 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10562 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10563 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
10564 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
10565 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
10566 : UINT64_C(0xc000b000a0009000) ));
10567 break;
10568 case 2:
10569 Assert( pfnFunction
10570 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
10571 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
10572 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
10573 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
10574 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
10575 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
10576 : UINT64_C(0xc000b000a0009000) ));
10577 break;
10578 case 4:
10579 Assert( pfnFunction
10580 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
10581 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
10582 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
10583 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
10584 : UINT64_C(0xc000b000a0009000) ));
10585 break;
10586 case 8:
10587 Assert( pfnFunction
10588 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
10589 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
10590 : UINT64_C(0xc000b000a0009000) ));
10591 break;
10592 }
10593 }
10594#endif
10595
10596#ifdef VBOX_STRICT
10597 /*
10598 * Check that the fExec flags we've got make sense.
10599 */
10600 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10601#endif
10602
10603 /*
10604 * To keep things simple we have to commit any pending writes first as we
10605 * may end up making calls.
10606 */
10607 /** @todo we could postpone this till we make the call and reload the
10608 * registers after returning from the call. Not sure if that's sensible or
10609 * not, though. */
10610 off = iemNativeRegFlushPendingWrites(pReNative, off);
10611
10612#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10613 /*
10614 * Move/spill/flush stuff out of call-volatile registers.
10615 * This is the easy way out. We could contain this to the tlb-miss branch
10616 * by saving and restoring active stuff here.
10617 */
10618 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10619#endif
10620
10621 /*
10622 * Define labels and allocate the result register (trying for the return
10623 * register if we can).
10624 */
10625 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10626 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
10627 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
10628 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
10629 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
10630 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
10631 uint8_t const idxRegValueStore = !TlbState.fSkip
10632 && enmOp == kIemNativeEmitMemOp_Store
10633 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
10634 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
10635 : UINT8_MAX;
10636 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
10637 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
10638 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
10639 : UINT32_MAX;
10640
10641 /*
10642 * Jump to the TLB lookup code.
10643 */
10644 if (!TlbState.fSkip)
10645 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
10646
10647 /*
10648 * TlbMiss:
10649 *
10650 * Call helper to do the fetching.
10651 * We flush all guest register shadow copies here.
10652 */
10653 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
10654
10655#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10656 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10657#else
10658 RT_NOREF(idxInstr);
10659#endif
10660
10661#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10662 /* Save variables in volatile registers. */
10663 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
10664 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
10665 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
10666 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
10667#endif
10668
10669 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
10670 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
10671 if (enmOp == kIemNativeEmitMemOp_Store)
10672 {
10673 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
10674 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
10675#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10676 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
10677#else
10678 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
10679 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
10680#endif
10681 }
10682
10683 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
10684 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
10685#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10686 fVolGregMask);
10687#else
10688 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
10689#endif
10690
10691 if (iSegReg != UINT8_MAX)
10692 {
10693 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
10694 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
10695 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
10696 }
10697
10698 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10699 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10700
10701 /* Done setting up parameters, make the call. */
10702 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10703
10704 /*
10705 * Put the result in the right register if this is a fetch.
10706 */
10707 if (enmOp != kIemNativeEmitMemOp_Store)
10708 {
10709 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
10710 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
10711 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
10712 }
10713
10714#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10715 /* Restore variables and guest shadow registers to volatile registers. */
10716 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
10717 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
10718#endif
10719
10720#ifdef IEMNATIVE_WITH_TLB_LOOKUP
10721 if (!TlbState.fSkip)
10722 {
10723 /* end of TlbMiss - Jump to the done label. */
10724 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10725 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
10726
10727 /*
10728 * TlbLookup:
10729 */
10730 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
10731 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
10732 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
10733
10734 /*
10735 * Emit code to do the actual storing / fetching.
10736 */
10737 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
10738# ifdef VBOX_WITH_STATISTICS
10739 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
10740 enmOp == kIemNativeEmitMemOp_Store
10741 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
10742 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
10743# endif
10744 switch (enmOp)
10745 {
10746 case kIemNativeEmitMemOp_Store:
10747 if (pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate)
10748 {
10749 switch (cbMem)
10750 {
10751 case 1:
10752 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
10753 break;
10754 case 2:
10755 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
10756 break;
10757 case 4:
10758 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
10759 break;
10760 case 8:
10761 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
10762 break;
10763 default:
10764 AssertFailed();
10765 }
10766 }
10767 else
10768 {
10769 switch (cbMem)
10770 {
10771 case 1:
10772 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off,
10773 (uint8_t)pReNative->Core.aVars[idxVarValue].u.uValue,
10774 idxRegMemResult, TlbState.idxReg1);
10775 break;
10776 case 2:
10777 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
10778 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
10779 idxRegMemResult, TlbState.idxReg1);
10780 break;
10781 case 4:
10782 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
10783 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
10784 idxRegMemResult, TlbState.idxReg1);
10785 break;
10786 case 8:
10787 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
10788 idxRegMemResult, TlbState.idxReg1);
10789 break;
10790 default:
10791 AssertFailed();
10792 }
10793 }
10794 break;
10795
10796 case kIemNativeEmitMemOp_Fetch:
10797 case kIemNativeEmitMemOp_Fetch_Zx_U16:
10798 case kIemNativeEmitMemOp_Fetch_Zx_U32:
10799 case kIemNativeEmitMemOp_Fetch_Zx_U64:
10800 switch (cbMem)
10801 {
10802 case 1:
10803 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10804 break;
10805 case 2:
10806 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10807 break;
10808 case 4:
10809 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10810 break;
10811 case 8:
10812 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10813 break;
10814 default:
10815 AssertFailed();
10816 }
10817 break;
10818
10819 case kIemNativeEmitMemOp_Fetch_Sx_U16:
10820 Assert(cbMem == 1);
10821 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10822 break;
10823
10824 case kIemNativeEmitMemOp_Fetch_Sx_U32:
10825 Assert(cbMem == 1 || cbMem == 2);
10826 if (cbMem == 1)
10827 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10828 else
10829 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10830 break;
10831
10832 case kIemNativeEmitMemOp_Fetch_Sx_U64:
10833 switch (cbMem)
10834 {
10835 case 1:
10836 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10837 break;
10838 case 2:
10839 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10840 break;
10841 case 4:
10842 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
10843 break;
10844 default:
10845 AssertFailed();
10846 }
10847 break;
10848
10849 default:
10850 AssertFailed();
10851 }
10852
10853 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
10854
10855 /*
10856 * TlbDone:
10857 */
10858 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10859
10860 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
10861
10862# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
10863 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
10864 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
10865# endif
10866 }
10867#else
10868 RT_NOREF(fAlignMask, idxLabelTlbMiss);
10869#endif
10870
10871 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
10872 iemNativeVarRegisterRelease(pReNative, idxVarValue);
10873 return off;
10874}
10875
10876
10877
10878/*********************************************************************************************************************************
10879* Memory fetches (IEM_MEM_FETCH_XXX). *
10880*********************************************************************************************************************************/
10881
10882/* 8-bit segmented: */
10883#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
10884 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
10885 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10886 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10887
10888#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10889 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10890 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10891 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10892
10893#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10894 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10895 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10896 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10897
10898#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10899 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10900 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10901 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10902
10903#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10904 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10905 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10906 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10907
10908#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10909 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10910 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10911 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10912
10913#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10914 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10915 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10916 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10917
10918/* 16-bit segmented: */
10919#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10920 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10921 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10922 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10923
10924#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10925 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10926 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10927 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10928
10929#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10930 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10931 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10932 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10933
10934#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10935 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10936 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10937 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10938
10939#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10940 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10941 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10942 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10943
10944#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10945 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10946 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10947 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10948
10949
10950/* 32-bit segmented: */
10951#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10952 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10953 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10954 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10955
10956#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10957 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10958 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10959 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10960
10961#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10962 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10963 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10964 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10965
10966#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10967 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10968 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10969 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10970
10971
10972/* 64-bit segmented: */
10973#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10974 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10975 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10976 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
10977
10978
10979
10980/* 8-bit flat: */
10981#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
10982 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
10983 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10984 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10985
10986#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
10987 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10988 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10989 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10990
10991#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
10992 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10993 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10994 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10995
10996#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
10997 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10998 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10999 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11000
11001#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
11002 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11003 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11004 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11005
11006#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
11007 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11008 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11009 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11010
11011#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
11012 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11013 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11014 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11015
11016
11017/* 16-bit flat: */
11018#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
11019 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11020 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11021 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11022
11023#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
11024 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11025 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11026 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11027
11028#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
11029 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11030 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11031 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11032
11033#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
11034 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11035 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11036 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11037
11038#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
11039 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11040 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11041 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11042
11043#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
11044 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11045 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11046 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11047
11048/* 32-bit flat: */
11049#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
11050 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11051 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11052 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11053
11054#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
11055 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11056 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11057 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11058
11059#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
11060 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11061 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11062 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11063
11064#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
11065 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11066 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11067 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11068
11069/* 64-bit flat: */
11070#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
11071 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11072 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11073 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
11074
11075
11076
11077/*********************************************************************************************************************************
11078* Memory stores (IEM_MEM_STORE_XXX). *
11079*********************************************************************************************************************************/
11080
11081#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
11082 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
11083 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11084 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11085
11086#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
11087 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
11088 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11089 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11090
11091#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
11092 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
11093 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11094 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11095
11096#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
11097 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
11098 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11099 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11100
11101
11102#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
11103 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
11104 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11105 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11106
11107#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
11108 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
11109 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11110 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11111
11112#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
11113 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
11114 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11115 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11116
11117#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
11118 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
11119 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11120 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11121
11122
11123#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
11124 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11125 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11126
11127#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
11128 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11129 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11130
11131#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
11132 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11133 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11134
11135#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
11136 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11137 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11138
11139
11140#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
11141 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11142 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11143
11144#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
11145 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11146 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11147
11148#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
11149 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11150 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11151
11152#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
11153 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11154 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11155
11156/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
11157 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
11158DECL_INLINE_THROW(uint32_t)
11159iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
11160 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
11161{
11162 /*
11163 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
11164 * to do the grunt work.
11165 */
11166 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
11167 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
11168 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
11169 pfnFunction, idxInstr);
11170 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
11171 return off;
11172}
11173
11174
11175
11176/*********************************************************************************************************************************
11177* Stack Accesses. *
11178*********************************************************************************************************************************/
11179/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
11180#define IEM_MC_PUSH_U16(a_u16Value) \
11181 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11182 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
11183#define IEM_MC_PUSH_U32(a_u32Value) \
11184 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11185 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
11186#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
11187 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
11188 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
11189#define IEM_MC_PUSH_U64(a_u64Value) \
11190 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11191 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
11192
11193#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
11194 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11195 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11196#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
11197 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11198 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
11199#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
11200 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
11201 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
11202
11203#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
11204 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11205 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11206#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
11207 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11208 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
11209
11210
11211DECL_FORCE_INLINE_THROW(uint32_t)
11212iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11213{
11214 /* Use16BitSp: */
11215#ifdef RT_ARCH_AMD64
11216 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
11217 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11218#else
11219 /* sub regeff, regrsp, #cbMem */
11220 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
11221 /* and regeff, regeff, #0xffff */
11222 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
11223 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
11224 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11225 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
11226#endif
11227 return off;
11228}
11229
11230
11231DECL_FORCE_INLINE(uint32_t)
11232iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11233{
11234 /* Use32BitSp: */
11235 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
11236 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11237 return off;
11238}
11239
11240
11241/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
11242DECL_INLINE_THROW(uint32_t)
11243iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
11244 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11245{
11246 /*
11247 * Assert sanity.
11248 */
11249 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11250#ifdef VBOX_STRICT
11251 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11252 {
11253 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11254 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11255 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11256 Assert( pfnFunction
11257 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11258 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
11259 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
11260 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11261 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
11262 : UINT64_C(0xc000b000a0009000) ));
11263 }
11264 else
11265 Assert( pfnFunction
11266 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
11267 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
11268 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
11269 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
11270 : UINT64_C(0xc000b000a0009000) ));
11271#endif
11272
11273#ifdef VBOX_STRICT
11274 /*
11275 * Check that the fExec flags we've got make sense.
11276 */
11277 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11278#endif
11279
11280 /*
11281 * To keep things simple we have to commit any pending writes first as we
11282 * may end up making calls.
11283 */
11284 /** @todo we could postpone this till we make the call and reload the
11285 * registers after returning from the call. Not sure if that's sensible or
11286 * not, though. */
11287 off = iemNativeRegFlushPendingWrites(pReNative, off);
11288
11289 /*
11290 * First we calculate the new RSP and the effective stack pointer value.
11291 * For 64-bit mode and flat 32-bit these two are the same.
11292 * (Code structure is very similar to that of PUSH)
11293 */
11294 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
11295 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
11296 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
11297 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
11298 ? cbMem : sizeof(uint16_t);
11299 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
11300 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
11301 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11302 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
11303 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
11304 if (cBitsFlat != 0)
11305 {
11306 Assert(idxRegEffSp == idxRegRsp);
11307 Assert(cBitsFlat == 32 || cBitsFlat == 64);
11308 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
11309 if (cBitsFlat == 64)
11310 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
11311 else
11312 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
11313 }
11314 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
11315 {
11316 Assert(idxRegEffSp != idxRegRsp);
11317 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
11318 kIemNativeGstRegUse_ReadOnly);
11319#ifdef RT_ARCH_AMD64
11320 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11321#else
11322 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11323#endif
11324 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
11325 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
11326 offFixupJumpToUseOtherBitSp = off;
11327 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11328 {
11329 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
11330 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11331 }
11332 else
11333 {
11334 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
11335 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11336 }
11337 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11338 }
11339 /* SpUpdateEnd: */
11340 uint32_t const offLabelSpUpdateEnd = off;
11341
11342 /*
11343 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
11344 * we're skipping lookup).
11345 */
11346 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
11347 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
11348 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11349 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11350 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11351 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11352 : UINT32_MAX;
11353 uint8_t const idxRegValue = !TlbState.fSkip
11354 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11355 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
11356 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
11357 : UINT8_MAX;
11358 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11359
11360
11361 if (!TlbState.fSkip)
11362 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11363 else
11364 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
11365
11366 /*
11367 * Use16BitSp:
11368 */
11369 if (cBitsFlat == 0)
11370 {
11371#ifdef RT_ARCH_AMD64
11372 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11373#else
11374 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11375#endif
11376 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
11377 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11378 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11379 else
11380 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11381 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
11382 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11383 }
11384
11385 /*
11386 * TlbMiss:
11387 *
11388 * Call helper to do the pushing.
11389 */
11390 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11391
11392#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11393 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11394#else
11395 RT_NOREF(idxInstr);
11396#endif
11397
11398 /* Save variables in volatile registers. */
11399 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11400 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
11401 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
11402 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
11403 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11404
11405 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
11406 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
11407 {
11408 /* Swap them using ARG0 as temp register: */
11409 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
11410 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
11411 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
11412 }
11413 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
11414 {
11415 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
11416 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
11417 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11418
11419 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
11420 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
11421 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11422 }
11423 else
11424 {
11425 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
11426 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11427
11428 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
11429 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
11430 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
11431 }
11432
11433 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11434 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11435
11436 /* Done setting up parameters, make the call. */
11437 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11438
11439 /* Restore variables and guest shadow registers to volatile registers. */
11440 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11441 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11442
11443#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11444 if (!TlbState.fSkip)
11445 {
11446 /* end of TlbMiss - Jump to the done label. */
11447 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11448 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11449
11450 /*
11451 * TlbLookup:
11452 */
11453 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
11454 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
11455
11456 /*
11457 * Emit code to do the actual storing / fetching.
11458 */
11459 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11460# ifdef VBOX_WITH_STATISTICS
11461 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11462 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
11463# endif
11464 if (idxRegValue != UINT8_MAX)
11465 {
11466 switch (cbMemAccess)
11467 {
11468 case 2:
11469 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11470 break;
11471 case 4:
11472 if (!fIsIntelSeg)
11473 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11474 else
11475 {
11476 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
11477 PUSH FS in real mode, so we have to try emulate that here.
11478 We borrow the now unused idxReg1 from the TLB lookup code here. */
11479 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
11480 kIemNativeGstReg_EFlags);
11481 if (idxRegEfl != UINT8_MAX)
11482 {
11483#ifdef ARCH_AMD64
11484 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
11485 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
11486 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11487#else
11488 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
11489 off, TlbState.idxReg1, idxRegEfl,
11490 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11491#endif
11492 iemNativeRegFreeTmp(pReNative, idxRegEfl);
11493 }
11494 else
11495 {
11496 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
11497 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11498 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
11499 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11500 }
11501 /* ASSUMES the upper half of idxRegValue is ZERO. */
11502 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
11503 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
11504 }
11505 break;
11506 case 8:
11507 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11508 break;
11509 default:
11510 AssertFailed();
11511 }
11512 }
11513 else
11514 {
11515 switch (cbMemAccess)
11516 {
11517 case 2:
11518 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
11519 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11520 idxRegMemResult, TlbState.idxReg1);
11521 break;
11522 case 4:
11523 Assert(!fIsSegReg);
11524 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
11525 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11526 idxRegMemResult, TlbState.idxReg1);
11527 break;
11528 case 8:
11529 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11530 idxRegMemResult, TlbState.idxReg1);
11531 break;
11532 default:
11533 AssertFailed();
11534 }
11535 }
11536
11537 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11538 TlbState.freeRegsAndReleaseVars(pReNative);
11539
11540 /*
11541 * TlbDone:
11542 *
11543 * Commit the new RSP value.
11544 */
11545 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11546 }
11547#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
11548
11549 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
11550 iemNativeRegFreeTmp(pReNative, idxRegRsp);
11551 if (idxRegEffSp != idxRegRsp)
11552 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
11553
11554 /* The value variable is implictly flushed. */
11555 if (idxRegValue != UINT8_MAX)
11556 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11557 iemNativeVarFreeLocal(pReNative, idxVarValue);
11558
11559 return off;
11560}
11561
11562
11563
11564/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
11565#define IEM_MC_POP_GREG_U16(a_iGReg) \
11566 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11567 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
11568#define IEM_MC_POP_GREG_U32(a_iGReg) \
11569 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11570 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
11571#define IEM_MC_POP_GREG_U64(a_iGReg) \
11572 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11573 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
11574
11575#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
11576 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11577 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
11578#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
11579 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11580 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
11581
11582#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
11583 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11584 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
11585#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
11586 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11587 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
11588
11589
11590DECL_FORCE_INLINE_THROW(uint32_t)
11591iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
11592 uint8_t idxRegTmp)
11593{
11594 /* Use16BitSp: */
11595#ifdef RT_ARCH_AMD64
11596 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11597 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
11598 RT_NOREF(idxRegTmp);
11599#else
11600 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
11601 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
11602 /* add tmp, regrsp, #cbMem */
11603 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
11604 /* and tmp, tmp, #0xffff */
11605 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
11606 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
11607 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
11608 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
11609#endif
11610 return off;
11611}
11612
11613
11614DECL_FORCE_INLINE(uint32_t)
11615iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11616{
11617 /* Use32BitSp: */
11618 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11619 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
11620 return off;
11621}
11622
11623
11624/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
11625DECL_INLINE_THROW(uint32_t)
11626iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
11627 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11628{
11629 /*
11630 * Assert sanity.
11631 */
11632 Assert(idxGReg < 16);
11633#ifdef VBOX_STRICT
11634 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11635 {
11636 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11637 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11638 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11639 Assert( pfnFunction
11640 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
11641 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
11642 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
11643 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
11644 : UINT64_C(0xc000b000a0009000) ));
11645 }
11646 else
11647 Assert( pfnFunction
11648 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
11649 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
11650 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
11651 : UINT64_C(0xc000b000a0009000) ));
11652#endif
11653
11654#ifdef VBOX_STRICT
11655 /*
11656 * Check that the fExec flags we've got make sense.
11657 */
11658 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11659#endif
11660
11661 /*
11662 * To keep things simple we have to commit any pending writes first as we
11663 * may end up making calls.
11664 */
11665 off = iemNativeRegFlushPendingWrites(pReNative, off);
11666
11667 /*
11668 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
11669 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
11670 * directly as the effective stack pointer.
11671 * (Code structure is very similar to that of PUSH)
11672 */
11673 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
11674 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
11675 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
11676 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11677 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
11678 /** @todo can do a better job picking the register here. For cbMem >= 4 this
11679 * will be the resulting register value. */
11680 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
11681
11682 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
11683 if (cBitsFlat != 0)
11684 {
11685 Assert(idxRegEffSp == idxRegRsp);
11686 Assert(cBitsFlat == 32 || cBitsFlat == 64);
11687 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
11688 }
11689 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
11690 {
11691 Assert(idxRegEffSp != idxRegRsp);
11692 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
11693 kIemNativeGstRegUse_ReadOnly);
11694#ifdef RT_ARCH_AMD64
11695 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11696#else
11697 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11698#endif
11699 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
11700 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
11701 offFixupJumpToUseOtherBitSp = off;
11702 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11703 {
11704/** @todo can skip idxRegRsp updating when popping ESP. */
11705 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
11706 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11707 }
11708 else
11709 {
11710 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
11711 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
11712 }
11713 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11714 }
11715 /* SpUpdateEnd: */
11716 uint32_t const offLabelSpUpdateEnd = off;
11717
11718 /*
11719 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
11720 * we're skipping lookup).
11721 */
11722 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
11723 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
11724 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11725 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11726 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11727 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11728 : UINT32_MAX;
11729
11730 if (!TlbState.fSkip)
11731 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11732 else
11733 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
11734
11735 /*
11736 * Use16BitSp:
11737 */
11738 if (cBitsFlat == 0)
11739 {
11740#ifdef RT_ARCH_AMD64
11741 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11742#else
11743 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11744#endif
11745 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
11746 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11747 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
11748 else
11749 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11750 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
11751 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11752 }
11753
11754 /*
11755 * TlbMiss:
11756 *
11757 * Call helper to do the pushing.
11758 */
11759 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11760
11761#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11762 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11763#else
11764 RT_NOREF(idxInstr);
11765#endif
11766
11767 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11768 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
11769 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
11770 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11771
11772
11773 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
11774 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
11775 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11776
11777 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11778 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11779
11780 /* Done setting up parameters, make the call. */
11781 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11782
11783 /* Move the return register content to idxRegMemResult. */
11784 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
11785 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
11786
11787 /* Restore variables and guest shadow registers to volatile registers. */
11788 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11789 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11790
11791#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11792 if (!TlbState.fSkip)
11793 {
11794 /* end of TlbMiss - Jump to the done label. */
11795 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11796 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11797
11798 /*
11799 * TlbLookup:
11800 */
11801 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
11802 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
11803
11804 /*
11805 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
11806 */
11807 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11808# ifdef VBOX_WITH_STATISTICS
11809 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11810 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
11811# endif
11812 switch (cbMem)
11813 {
11814 case 2:
11815 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
11816 break;
11817 case 4:
11818 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
11819 break;
11820 case 8:
11821 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
11822 break;
11823 default:
11824 AssertFailed();
11825 }
11826
11827 TlbState.freeRegsAndReleaseVars(pReNative);
11828
11829 /*
11830 * TlbDone:
11831 *
11832 * Set the new RSP value (FLAT accesses needs to calculate it first) and
11833 * commit the popped register value.
11834 */
11835 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11836 }
11837#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
11838
11839 if (idxGReg != X86_GREG_xSP)
11840 {
11841 /* Set the register. */
11842 if (cbMem >= sizeof(uint32_t))
11843 {
11844 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
11845 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
11846 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
11847 }
11848 else
11849 {
11850 Assert(cbMem == sizeof(uint16_t));
11851 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
11852 kIemNativeGstRegUse_ForUpdate);
11853 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
11854 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
11855 iemNativeRegFreeTmp(pReNative, idxRegDst);
11856 }
11857
11858 /* Complete RSP calculation for FLAT mode. */
11859 if (idxRegEffSp == idxRegRsp)
11860 {
11861 if (cBitsFlat == 64)
11862 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
11863 else
11864 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
11865 }
11866 }
11867 else
11868 {
11869 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
11870 if (cbMem == sizeof(uint64_t))
11871 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
11872 else if (cbMem == sizeof(uint32_t))
11873 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
11874 else
11875 {
11876 if (idxRegEffSp == idxRegRsp)
11877 {
11878 if (cBitsFlat == 64)
11879 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
11880 else
11881 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
11882 }
11883 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
11884 }
11885 }
11886 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
11887
11888 iemNativeRegFreeTmp(pReNative, idxRegRsp);
11889 if (idxRegEffSp != idxRegRsp)
11890 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
11891 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11892
11893 return off;
11894}
11895
11896
11897
11898/*********************************************************************************************************************************
11899* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
11900*********************************************************************************************************************************/
11901
11902#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11903 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11904 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11905 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
11906
11907#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11908 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11909 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11910 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
11911
11912#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11913 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11914 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
11915 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
11916
11917
11918#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11919 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11920 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11921 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
11922
11923#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11924 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11925 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11926 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
11927
11928#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11929 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11930 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11931 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
11932
11933#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11934 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
11935 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11936 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
11937
11938
11939#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11940 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11941 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11942 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
11943
11944#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11945 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11946 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11947 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
11948
11949#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11950 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11951 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11952 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
11953
11954#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11955 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
11956 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11957 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
11958
11959
11960#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11961 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11962 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11963 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
11964
11965#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11966 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11967 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11968 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
11969
11970#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11971 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11972 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11973 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
11974
11975#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11976 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
11977 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11978 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
11979
11980
11981#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11982 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
11983 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11984 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
11985
11986#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11987 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
11988 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
11989 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
11990
11991
11992#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11993 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
11994 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11995 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
11996
11997#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11998 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
11999 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12000 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
12001
12002#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12003 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12004 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12005 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
12006
12007
12008
12009#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12010 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12011 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
12012 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
12013
12014#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12015 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12016 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
12017 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
12018
12019#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12020 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12021 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
12022 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
12023
12024
12025#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12026 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12027 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12028 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
12029
12030#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12031 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12032 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12033 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12034
12035#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12036 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12037 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12038 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
12039
12040#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
12041 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
12042 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12043 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12044
12045
12046#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12047 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12048 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12049 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
12050
12051#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12052 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12053 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12054 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12055
12056#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12057 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12058 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12059 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
12060
12061#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
12062 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
12063 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12064 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12065
12066
12067#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12068 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12069 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12070 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
12071
12072#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12073 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12074 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12075 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12076
12077#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12078 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12079 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12080 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
12081
12082#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
12083 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
12084 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12085 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12086
12087
12088#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
12089 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12090 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12091 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
12092
12093#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
12094 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12095 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12096 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
12097
12098
12099#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12100 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12101 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12102 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
12103
12104#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12105 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12106 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12107 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
12108
12109#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12110 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12111 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12112 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
12113
12114
12115DECL_INLINE_THROW(uint32_t)
12116iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
12117 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
12118 uintptr_t pfnFunction, uint8_t idxInstr)
12119{
12120 /*
12121 * Assert sanity.
12122 */
12123 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
12124 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
12125 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
12126 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12127
12128 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12129 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
12130 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
12131 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12132
12133 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12134 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
12135 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
12136 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12137
12138 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12139
12140 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12141
12142#ifdef VBOX_STRICT
12143# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
12144 ( ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
12145 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
12146 : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
12147 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
12148
12149 if (iSegReg == UINT8_MAX)
12150 {
12151 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12152 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12153 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12154 switch (cbMem)
12155 {
12156 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
12157 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
12158 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
12159 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
12160 case 10:
12161 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
12162 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
12163 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12164 break;
12165 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
12166# if 0
12167 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
12168 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
12169# endif
12170 default: AssertFailed(); break;
12171 }
12172 }
12173 else
12174 {
12175 Assert(iSegReg < 6);
12176 switch (cbMem)
12177 {
12178 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
12179 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
12180 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
12181 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
12182 case 10:
12183 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
12184 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
12185 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12186 break;
12187 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
12188# if 0
12189 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
12190 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
12191# endif
12192 default: AssertFailed(); break;
12193 }
12194 }
12195# undef IEM_MAP_HLP_FN
12196#endif
12197
12198#ifdef VBOX_STRICT
12199 /*
12200 * Check that the fExec flags we've got make sense.
12201 */
12202 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12203#endif
12204
12205 /*
12206 * To keep things simple we have to commit any pending writes first as we
12207 * may end up making calls.
12208 */
12209 off = iemNativeRegFlushPendingWrites(pReNative, off);
12210
12211#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12212 /*
12213 * Move/spill/flush stuff out of call-volatile registers.
12214 * This is the easy way out. We could contain this to the tlb-miss branch
12215 * by saving and restoring active stuff here.
12216 */
12217 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
12218 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
12219#endif
12220
12221 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
12222 while the tlb-miss codepath will temporarily put it on the stack.
12223 Set the the type to stack here so we don't need to do it twice below. */
12224 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
12225 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
12226 /** @todo use a tmp register from TlbState, since they'll be free after tlb
12227 * lookup is done. */
12228
12229 /*
12230 * Define labels and allocate the result register (trying for the return
12231 * register if we can).
12232 */
12233 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12234 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
12235 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
12236 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
12237 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
12238 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12239 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12240 : UINT32_MAX;
12241//off=iemNativeEmitBrk(pReNative, off, 0);
12242 /*
12243 * Jump to the TLB lookup code.
12244 */
12245 if (!TlbState.fSkip)
12246 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12247
12248 /*
12249 * TlbMiss:
12250 *
12251 * Call helper to do the fetching.
12252 * We flush all guest register shadow copies here.
12253 */
12254 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
12255
12256#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12257 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12258#else
12259 RT_NOREF(idxInstr);
12260#endif
12261
12262#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12263 /* Save variables in volatile registers. */
12264 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
12265 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12266#endif
12267
12268 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
12269 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
12270#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12271 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
12272#else
12273 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12274#endif
12275
12276 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
12277 if (iSegReg != UINT8_MAX)
12278 {
12279 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
12280 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
12281 }
12282
12283 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
12284 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
12285 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
12286
12287 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12288 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12289
12290 /* Done setting up parameters, make the call. */
12291 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12292
12293 /*
12294 * Put the output in the right registers.
12295 */
12296 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
12297 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12298 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12299
12300#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12301 /* Restore variables and guest shadow registers to volatile registers. */
12302 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12303 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12304#endif
12305
12306 Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
12307 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
12308
12309#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12310 if (!TlbState.fSkip)
12311 {
12312 /* end of tlbsmiss - Jump to the done label. */
12313 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12314 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12315
12316 /*
12317 * TlbLookup:
12318 */
12319 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
12320 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12321# ifdef VBOX_WITH_STATISTICS
12322 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
12323 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
12324# endif
12325
12326 /* [idxVarUnmapInfo] = 0; */
12327 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
12328
12329 /*
12330 * TlbDone:
12331 */
12332 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12333
12334 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12335
12336# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12337 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12338 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12339# endif
12340 }
12341#else
12342 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
12343#endif
12344
12345 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12346 iemNativeVarRegisterRelease(pReNative, idxVarMem);
12347
12348 return off;
12349}
12350
12351
12352#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
12353 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
12354 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
12355
12356#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
12357 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
12358 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
12359
12360#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
12361 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
12362 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
12363
12364DECL_INLINE_THROW(uint32_t)
12365iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
12366 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
12367{
12368 /*
12369 * Assert sanity.
12370 */
12371 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12372 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
12373 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
12374 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
12375#ifdef VBOX_STRICT
12376 switch (fAccess & IEM_ACCESS_TYPE_MASK)
12377 {
12378 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
12379 case IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
12380 case IEM_ACCESS_TYPE_READ: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
12381 default: AssertFailed();
12382 }
12383#else
12384 RT_NOREF(fAccess);
12385#endif
12386
12387 /*
12388 * To keep things simple we have to commit any pending writes first as we
12389 * may end up making calls (there shouldn't be any at this point, so this
12390 * is just for consistency).
12391 */
12392 /** @todo we could postpone this till we make the call and reload the
12393 * registers after returning from the call. Not sure if that's sensible or
12394 * not, though. */
12395 off = iemNativeRegFlushPendingWrites(pReNative, off);
12396
12397 /*
12398 * Move/spill/flush stuff out of call-volatile registers.
12399 *
12400 * We exclude any register holding the bUnmapInfo variable, as we'll be
12401 * checking it after returning from the call and will free it afterwards.
12402 */
12403 /** @todo save+restore active registers and maybe guest shadows in miss
12404 * scenario. */
12405 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
12406
12407 /*
12408 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
12409 * to call the unmap helper function.
12410 *
12411 * The likelyhood of it being zero is higher than for the TLB hit when doing
12412 * the mapping, as a TLB miss for an well aligned and unproblematic memory
12413 * access should also end up with a mapping that won't need special unmapping.
12414 */
12415 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
12416 * should speed up things for the pure interpreter as well when TLBs
12417 * are enabled. */
12418#ifdef RT_ARCH_AMD64
12419 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
12420 {
12421 /* test byte [rbp - xxx], 0ffh */
12422 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
12423 pbCodeBuf[off++] = 0xf6;
12424 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
12425 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
12426 pbCodeBuf[off++] = 0xff;
12427 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12428 }
12429 else
12430#endif
12431 {
12432 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
12433 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
12434 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
12435 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12436 }
12437 uint32_t const offJmpFixup = off;
12438 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
12439
12440 /*
12441 * Call the unmap helper function.
12442 */
12443#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
12444 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12445#else
12446 RT_NOREF(idxInstr);
12447#endif
12448
12449 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
12450 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
12451 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12452
12453 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12454 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12455
12456 /* Done setting up parameters, make the call. */
12457 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12458
12459 /* The bUnmapInfo variable is implictly free by these MCs. */
12460 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
12461
12462 /*
12463 * Done, just fixup the jump for the non-call case.
12464 */
12465 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
12466
12467 return off;
12468}
12469
12470
12471
12472/*********************************************************************************************************************************
12473* State and Exceptions *
12474*********************************************************************************************************************************/
12475
12476#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12477#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12478
12479#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12480#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12481#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12482
12483#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12484#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12485#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12486
12487
12488DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
12489{
12490 /** @todo this needs a lot more work later. */
12491 RT_NOREF(pReNative, fForChange);
12492 return off;
12493}
12494
12495
12496/*********************************************************************************************************************************
12497* The native code generator functions for each MC block. *
12498*********************************************************************************************************************************/
12499
12500
12501/*
12502 * Include g_apfnIemNativeRecompileFunctions and associated functions.
12503 *
12504 * This should probably live in it's own file later, but lets see what the
12505 * compile times turn out to be first.
12506 */
12507#include "IEMNativeFunctions.cpp.h"
12508
12509
12510
12511/*********************************************************************************************************************************
12512* Recompiler Core. *
12513*********************************************************************************************************************************/
12514
12515
12516/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
12517static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
12518{
12519 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
12520 pDis->cbCachedInstr += cbMaxRead;
12521 RT_NOREF(cbMinRead);
12522 return VERR_NO_DATA;
12523}
12524
12525
12526/**
12527 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
12528 * @returns pszBuf.
12529 * @param fFlags The flags.
12530 * @param pszBuf The output buffer.
12531 * @param cbBuf The output buffer size. At least 32 bytes.
12532 */
12533DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
12534{
12535 Assert(cbBuf >= 32);
12536 static RTSTRTUPLE const s_aModes[] =
12537 {
12538 /* [00] = */ { RT_STR_TUPLE("16BIT") },
12539 /* [01] = */ { RT_STR_TUPLE("32BIT") },
12540 /* [02] = */ { RT_STR_TUPLE("!2!") },
12541 /* [03] = */ { RT_STR_TUPLE("!3!") },
12542 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
12543 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
12544 /* [06] = */ { RT_STR_TUPLE("!6!") },
12545 /* [07] = */ { RT_STR_TUPLE("!7!") },
12546 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
12547 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
12548 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
12549 /* [0b] = */ { RT_STR_TUPLE("!b!") },
12550 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
12551 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
12552 /* [0e] = */ { RT_STR_TUPLE("!e!") },
12553 /* [0f] = */ { RT_STR_TUPLE("!f!") },
12554 /* [10] = */ { RT_STR_TUPLE("!10!") },
12555 /* [11] = */ { RT_STR_TUPLE("!11!") },
12556 /* [12] = */ { RT_STR_TUPLE("!12!") },
12557 /* [13] = */ { RT_STR_TUPLE("!13!") },
12558 /* [14] = */ { RT_STR_TUPLE("!14!") },
12559 /* [15] = */ { RT_STR_TUPLE("!15!") },
12560 /* [16] = */ { RT_STR_TUPLE("!16!") },
12561 /* [17] = */ { RT_STR_TUPLE("!17!") },
12562 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
12563 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
12564 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
12565 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
12566 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
12567 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
12568 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
12569 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
12570 };
12571 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
12572 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
12573 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
12574
12575 pszBuf[off++] = ' ';
12576 pszBuf[off++] = 'C';
12577 pszBuf[off++] = 'P';
12578 pszBuf[off++] = 'L';
12579 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
12580 Assert(off < 32);
12581
12582 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
12583
12584 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
12585 {
12586 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
12587 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
12588 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
12589 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
12590 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
12591 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
12592 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
12593 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
12594 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
12595 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
12596 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
12597 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
12598 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
12599 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
12600 };
12601 if (fFlags)
12602 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
12603 if (s_aFlags[i].fFlag & fFlags)
12604 {
12605 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
12606 pszBuf[off++] = ' ';
12607 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
12608 off += s_aFlags[i].cchName;
12609 fFlags &= ~s_aFlags[i].fFlag;
12610 if (!fFlags)
12611 break;
12612 }
12613 pszBuf[off] = '\0';
12614
12615 return pszBuf;
12616}
12617
12618
12619DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
12620{
12621 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
12622#if defined(RT_ARCH_AMD64)
12623 static const char * const a_apszMarkers[] =
12624 {
12625 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
12626 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
12627 };
12628#endif
12629
12630 char szDisBuf[512];
12631 DISSTATE Dis;
12632 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
12633 uint32_t const cNative = pTb->Native.cInstructions;
12634 uint32_t offNative = 0;
12635#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12636 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
12637#endif
12638 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
12639 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
12640 : DISCPUMODE_64BIT;
12641#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12642 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
12643#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12644 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
12645#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
12646# error "Port me"
12647#else
12648 csh hDisasm = ~(size_t)0;
12649# if defined(RT_ARCH_AMD64)
12650 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
12651# elif defined(RT_ARCH_ARM64)
12652 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
12653# else
12654# error "Port me"
12655# endif
12656 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
12657#endif
12658
12659 /*
12660 * Print TB info.
12661 */
12662 pHlp->pfnPrintf(pHlp,
12663 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
12664 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
12665 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
12666 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
12667#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12668 if (pDbgInfo && pDbgInfo->cEntries > 1)
12669 {
12670 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
12671
12672 /*
12673 * This disassembly is driven by the debug info which follows the native
12674 * code and indicates when it starts with the next guest instructions,
12675 * where labels are and such things.
12676 */
12677 uint32_t idxThreadedCall = 0;
12678 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
12679 uint8_t idxRange = UINT8_MAX;
12680 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
12681 uint32_t offRange = 0;
12682 uint32_t offOpcodes = 0;
12683 uint32_t const cbOpcodes = pTb->cbOpcodes;
12684 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
12685 uint32_t const cDbgEntries = pDbgInfo->cEntries;
12686 uint32_t iDbgEntry = 1;
12687 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
12688
12689 while (offNative < cNative)
12690 {
12691 /* If we're at or have passed the point where the next chunk of debug
12692 info starts, process it. */
12693 if (offDbgNativeNext <= offNative)
12694 {
12695 offDbgNativeNext = UINT32_MAX;
12696 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
12697 {
12698 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
12699 {
12700 case kIemTbDbgEntryType_GuestInstruction:
12701 {
12702 /* Did the exec flag change? */
12703 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
12704 {
12705 pHlp->pfnPrintf(pHlp,
12706 " fExec change %#08x -> %#08x %s\n",
12707 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
12708 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
12709 szDisBuf, sizeof(szDisBuf)));
12710 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
12711 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
12712 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
12713 : DISCPUMODE_64BIT;
12714 }
12715
12716 /* New opcode range? We need to fend up a spurious debug info entry here for cases
12717 where the compilation was aborted before the opcode was recorded and the actual
12718 instruction was translated to a threaded call. This may happen when we run out
12719 of ranges, or when some complicated interrupts/FFs are found to be pending or
12720 similar. So, we just deal with it here rather than in the compiler code as it
12721 is a lot simpler to do here. */
12722 if ( idxRange == UINT8_MAX
12723 || idxRange >= cRanges
12724 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
12725 {
12726 idxRange += 1;
12727 if (idxRange < cRanges)
12728 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
12729 else
12730 continue;
12731 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
12732 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
12733 + (pTb->aRanges[idxRange].idxPhysPage == 0
12734 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
12735 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
12736 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
12737 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
12738 pTb->aRanges[idxRange].idxPhysPage);
12739 GCPhysPc += offRange;
12740 }
12741
12742 /* Disassemble the instruction. */
12743 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
12744 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
12745 uint32_t cbInstr = 1;
12746 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
12747 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
12748 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
12749 if (RT_SUCCESS(rc))
12750 {
12751 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12752 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12753 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12754 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12755
12756 static unsigned const s_offMarker = 55;
12757 static char const s_szMarker[] = " ; <--- guest";
12758 if (cch < s_offMarker)
12759 {
12760 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
12761 cch = s_offMarker;
12762 }
12763 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
12764 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
12765
12766 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
12767 }
12768 else
12769 {
12770 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
12771 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
12772 cbInstr = 1;
12773 }
12774 GCPhysPc += cbInstr;
12775 offOpcodes += cbInstr;
12776 offRange += cbInstr;
12777 continue;
12778 }
12779
12780 case kIemTbDbgEntryType_ThreadedCall:
12781 pHlp->pfnPrintf(pHlp,
12782 " Call #%u to %s (%u args) - %s\n",
12783 idxThreadedCall,
12784 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
12785 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
12786 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
12787 idxThreadedCall++;
12788 continue;
12789
12790 case kIemTbDbgEntryType_GuestRegShadowing:
12791 {
12792 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
12793 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
12794 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
12795 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
12796 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
12797 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
12798 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
12799 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
12800 else
12801 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
12802 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
12803 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
12804 continue;
12805 }
12806
12807 case kIemTbDbgEntryType_Label:
12808 {
12809 const char *pszName = "what_the_fudge";
12810 const char *pszComment = "";
12811 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
12812 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
12813 {
12814 case kIemNativeLabelType_Return:
12815 pszName = "Return";
12816 break;
12817 case kIemNativeLabelType_ReturnBreak:
12818 pszName = "ReturnBreak";
12819 break;
12820 case kIemNativeLabelType_ReturnWithFlags:
12821 pszName = "ReturnWithFlags";
12822 break;
12823 case kIemNativeLabelType_NonZeroRetOrPassUp:
12824 pszName = "NonZeroRetOrPassUp";
12825 break;
12826 case kIemNativeLabelType_RaiseGp0:
12827 pszName = "RaiseGp0";
12828 break;
12829 case kIemNativeLabelType_ObsoleteTb:
12830 pszName = "ObsoleteTb";
12831 break;
12832 case kIemNativeLabelType_NeedCsLimChecking:
12833 pszName = "NeedCsLimChecking";
12834 break;
12835 case kIemNativeLabelType_CheckBranchMiss:
12836 pszName = "CheckBranchMiss";
12837 break;
12838 case kIemNativeLabelType_If:
12839 pszName = "If";
12840 fNumbered = true;
12841 break;
12842 case kIemNativeLabelType_Else:
12843 pszName = "Else";
12844 fNumbered = true;
12845 pszComment = " ; regs state restored pre-if-block";
12846 break;
12847 case kIemNativeLabelType_Endif:
12848 pszName = "Endif";
12849 fNumbered = true;
12850 break;
12851 case kIemNativeLabelType_CheckIrq:
12852 pszName = "CheckIrq_CheckVM";
12853 fNumbered = true;
12854 break;
12855 case kIemNativeLabelType_TlbLookup:
12856 pszName = "TlbLookup";
12857 fNumbered = true;
12858 break;
12859 case kIemNativeLabelType_TlbMiss:
12860 pszName = "TlbMiss";
12861 fNumbered = true;
12862 break;
12863 case kIemNativeLabelType_TlbDone:
12864 pszName = "TlbDone";
12865 fNumbered = true;
12866 break;
12867 case kIemNativeLabelType_Invalid:
12868 case kIemNativeLabelType_End:
12869 break;
12870 }
12871 if (fNumbered)
12872 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
12873 else
12874 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
12875 continue;
12876 }
12877
12878 case kIemTbDbgEntryType_NativeOffset:
12879 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
12880 Assert(offDbgNativeNext > offNative);
12881 break;
12882
12883 default:
12884 AssertFailed();
12885 }
12886 iDbgEntry++;
12887 break;
12888 }
12889 }
12890
12891 /*
12892 * Disassemble the next native instruction.
12893 */
12894 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
12895# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12896 uint32_t cbInstr = sizeof(paNative[0]);
12897 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
12898 if (RT_SUCCESS(rc))
12899 {
12900# if defined(RT_ARCH_AMD64)
12901 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
12902 {
12903 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
12904 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
12905 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
12906 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
12907 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
12908 uInfo & 0x8000 ? "recompiled" : "todo");
12909 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
12910 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
12911 else
12912 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
12913 }
12914 else
12915# endif
12916 {
12917# ifdef RT_ARCH_AMD64
12918 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12919 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12920 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12921 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12922# elif defined(RT_ARCH_ARM64)
12923 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
12924 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12925 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12926# else
12927# error "Port me"
12928# endif
12929 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
12930 }
12931 }
12932 else
12933 {
12934# if defined(RT_ARCH_AMD64)
12935 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
12936 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
12937# elif defined(RT_ARCH_ARM64)
12938 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
12939# else
12940# error "Port me"
12941# endif
12942 cbInstr = sizeof(paNative[0]);
12943 }
12944 offNative += cbInstr / sizeof(paNative[0]);
12945
12946# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12947 cs_insn *pInstr;
12948 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
12949 (uintptr_t)pNativeCur, 1, &pInstr);
12950 if (cInstrs > 0)
12951 {
12952 Assert(cInstrs == 1);
12953# if defined(RT_ARCH_AMD64)
12954 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
12955 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
12956# else
12957 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
12958 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
12959# endif
12960 offNative += pInstr->size / sizeof(*pNativeCur);
12961 cs_free(pInstr, cInstrs);
12962 }
12963 else
12964 {
12965# if defined(RT_ARCH_AMD64)
12966 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
12967 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
12968# else
12969 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
12970# endif
12971 offNative++;
12972 }
12973# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12974 }
12975 }
12976 else
12977#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
12978 {
12979 /*
12980 * No debug info, just disassemble the x86 code and then the native code.
12981 *
12982 * First the guest code:
12983 */
12984 for (unsigned i = 0; i < pTb->cRanges; i++)
12985 {
12986 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
12987 + (pTb->aRanges[i].idxPhysPage == 0
12988 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
12989 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
12990 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
12991 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
12992 unsigned off = pTb->aRanges[i].offOpcodes;
12993 /** @todo this ain't working when crossing pages! */
12994 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
12995 while (off < cbOpcodes)
12996 {
12997 uint32_t cbInstr = 1;
12998 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
12999 &pTb->pabOpcodes[off], cbOpcodes - off,
13000 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13001 if (RT_SUCCESS(rc))
13002 {
13003 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13004 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13005 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13006 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13007 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
13008 GCPhysPc += cbInstr;
13009 off += cbInstr;
13010 }
13011 else
13012 {
13013 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
13014 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
13015 break;
13016 }
13017 }
13018 }
13019
13020 /*
13021 * Then the native code:
13022 */
13023 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
13024 while (offNative < cNative)
13025 {
13026 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13027# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13028 uint32_t cbInstr = sizeof(paNative[0]);
13029 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13030 if (RT_SUCCESS(rc))
13031 {
13032# if defined(RT_ARCH_AMD64)
13033 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13034 {
13035 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13036 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13037 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13038 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13039 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13040 uInfo & 0x8000 ? "recompiled" : "todo");
13041 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13042 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13043 else
13044 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13045 }
13046 else
13047# endif
13048 {
13049# ifdef RT_ARCH_AMD64
13050 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13051 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13052 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13053 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13054# elif defined(RT_ARCH_ARM64)
13055 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13056 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13057 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13058# else
13059# error "Port me"
13060# endif
13061 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13062 }
13063 }
13064 else
13065 {
13066# if defined(RT_ARCH_AMD64)
13067 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13068 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13069# else
13070 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13071# endif
13072 cbInstr = sizeof(paNative[0]);
13073 }
13074 offNative += cbInstr / sizeof(paNative[0]);
13075
13076# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13077 cs_insn *pInstr;
13078 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13079 (uintptr_t)pNativeCur, 1, &pInstr);
13080 if (cInstrs > 0)
13081 {
13082 Assert(cInstrs == 1);
13083# if defined(RT_ARCH_AMD64)
13084 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13085 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13086# else
13087 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13088 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13089# endif
13090 offNative += pInstr->size / sizeof(*pNativeCur);
13091 cs_free(pInstr, cInstrs);
13092 }
13093 else
13094 {
13095# if defined(RT_ARCH_AMD64)
13096 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13097 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13098# else
13099 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13100# endif
13101 offNative++;
13102 }
13103# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13104 }
13105 }
13106
13107#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13108 /* Cleanup. */
13109 cs_close(&hDisasm);
13110#endif
13111}
13112
13113
13114/**
13115 * Recompiles the given threaded TB into a native one.
13116 *
13117 * In case of failure the translation block will be returned as-is.
13118 *
13119 * @returns pTb.
13120 * @param pVCpu The cross context virtual CPU structure of the calling
13121 * thread.
13122 * @param pTb The threaded translation to recompile to native.
13123 */
13124DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
13125{
13126 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
13127
13128 /*
13129 * The first time thru, we allocate the recompiler state, the other times
13130 * we just need to reset it before using it again.
13131 */
13132 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
13133 if (RT_LIKELY(pReNative))
13134 iemNativeReInit(pReNative, pTb);
13135 else
13136 {
13137 pReNative = iemNativeInit(pVCpu, pTb);
13138 AssertReturn(pReNative, pTb);
13139 }
13140
13141 /*
13142 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
13143 * for aborting if an error happens.
13144 */
13145 uint32_t cCallsLeft = pTb->Thrd.cCalls;
13146#ifdef LOG_ENABLED
13147 uint32_t const cCallsOrg = cCallsLeft;
13148#endif
13149 uint32_t off = 0;
13150 int rc = VINF_SUCCESS;
13151 IEMNATIVE_TRY_SETJMP(pReNative, rc)
13152 {
13153 /*
13154 * Emit prolog code (fixed).
13155 */
13156 off = iemNativeEmitProlog(pReNative, off);
13157
13158 /*
13159 * Convert the calls to native code.
13160 */
13161#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13162 int32_t iGstInstr = -1;
13163#endif
13164#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
13165 uint32_t cThreadedCalls = 0;
13166 uint32_t cRecompiledCalls = 0;
13167#endif
13168 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
13169 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
13170 while (cCallsLeft-- > 0)
13171 {
13172 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
13173
13174 /*
13175 * Debug info and assembly markup.
13176 */
13177 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
13178 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
13179#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13180 iemNativeDbgInfoAddNativeOffset(pReNative, off);
13181 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
13182 {
13183 if (iGstInstr < (int32_t)pTb->cInstructions)
13184 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
13185 else
13186 Assert(iGstInstr == pTb->cInstructions);
13187 iGstInstr = pCallEntry->idxInstr;
13188 }
13189 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
13190#endif
13191#if defined(VBOX_STRICT)
13192 off = iemNativeEmitMarker(pReNative, off,
13193 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
13194 pCallEntry->enmFunction));
13195#endif
13196#if defined(VBOX_STRICT)
13197 iemNativeRegAssertSanity(pReNative);
13198#endif
13199
13200 /*
13201 * Actual work.
13202 */
13203 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
13204 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)"));
13205 if (pfnRecom) /** @todo stats on this. */
13206 {
13207 off = pfnRecom(pReNative, off, pCallEntry);
13208 STAM_REL_STATS({cRecompiledCalls++;});
13209 }
13210 else
13211 {
13212 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
13213 STAM_REL_STATS({cThreadedCalls++;});
13214 }
13215 Assert(off <= pReNative->cInstrBufAlloc);
13216 Assert(pReNative->cCondDepth == 0);
13217
13218 /*
13219 * Advance.
13220 */
13221 pCallEntry++;
13222 }
13223
13224 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
13225 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
13226 if (!cThreadedCalls)
13227 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
13228
13229 /*
13230 * Emit the epilog code.
13231 */
13232 uint32_t idxReturnLabel;
13233 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
13234
13235 /*
13236 * Generate special jump labels.
13237 */
13238 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
13239 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
13240 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
13241 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
13242 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
13243 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
13244 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
13245 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
13246 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
13247 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
13248 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
13249 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
13250 }
13251 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
13252 {
13253 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
13254 return pTb;
13255 }
13256 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
13257 Assert(off <= pReNative->cInstrBufAlloc);
13258
13259 /*
13260 * Make sure all labels has been defined.
13261 */
13262 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
13263#ifdef VBOX_STRICT
13264 uint32_t const cLabels = pReNative->cLabels;
13265 for (uint32_t i = 0; i < cLabels; i++)
13266 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
13267#endif
13268
13269 /*
13270 * Allocate executable memory, copy over the code we've generated.
13271 */
13272 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
13273 if (pTbAllocator->pDelayedFreeHead)
13274 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
13275
13276 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
13277 AssertReturn(paFinalInstrBuf, pTb);
13278 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
13279
13280 /*
13281 * Apply fixups.
13282 */
13283 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
13284 uint32_t const cFixups = pReNative->cFixups;
13285 for (uint32_t i = 0; i < cFixups; i++)
13286 {
13287 Assert(paFixups[i].off < off);
13288 Assert(paFixups[i].idxLabel < cLabels);
13289 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
13290 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
13291 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
13292 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
13293 switch (paFixups[i].enmType)
13294 {
13295#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
13296 case kIemNativeFixupType_Rel32:
13297 Assert(paFixups[i].off + 4 <= off);
13298 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13299 continue;
13300
13301#elif defined(RT_ARCH_ARM64)
13302 case kIemNativeFixupType_RelImm26At0:
13303 {
13304 Assert(paFixups[i].off < off);
13305 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13306 Assert(offDisp >= -262144 && offDisp < 262144);
13307 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
13308 continue;
13309 }
13310
13311 case kIemNativeFixupType_RelImm19At5:
13312 {
13313 Assert(paFixups[i].off < off);
13314 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13315 Assert(offDisp >= -262144 && offDisp < 262144);
13316 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
13317 continue;
13318 }
13319
13320 case kIemNativeFixupType_RelImm14At5:
13321 {
13322 Assert(paFixups[i].off < off);
13323 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13324 Assert(offDisp >= -8192 && offDisp < 8192);
13325 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
13326 continue;
13327 }
13328
13329#endif
13330 case kIemNativeFixupType_Invalid:
13331 case kIemNativeFixupType_End:
13332 break;
13333 }
13334 AssertFailed();
13335 }
13336
13337 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
13338 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
13339
13340 /*
13341 * Convert the translation block.
13342 */
13343 RTMemFree(pTb->Thrd.paCalls);
13344 pTb->Native.paInstructions = paFinalInstrBuf;
13345 pTb->Native.cInstructions = off;
13346 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
13347#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13348 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
13349 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
13350#endif
13351
13352 Assert(pTbAllocator->cThreadedTbs > 0);
13353 pTbAllocator->cThreadedTbs -= 1;
13354 pTbAllocator->cNativeTbs += 1;
13355 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
13356
13357#ifdef LOG_ENABLED
13358 /*
13359 * Disassemble to the log if enabled.
13360 */
13361 if (LogIs3Enabled())
13362 {
13363 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
13364 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
13365# ifdef DEBUG_bird
13366 RTLogFlush(NULL);
13367# endif
13368 }
13369#endif
13370 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
13371
13372 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
13373 return pTb;
13374}
13375
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette