VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102719

Last change on this file since 102719 was 102717, checked in by vboxsync, 16 months ago

VBox/VMM: Outlined native TLB lookup code for IEM_MC_MEM_MAP_XXXX on x86 hosts. Untested+disabled. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 550.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102717 2023-12-27 19:45:49Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128/*********************************************************************************************************************************
129* Internal Functions *
130*********************************************************************************************************************************/
131#ifdef VBOX_STRICT
132static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
133 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
134static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
135#endif
136#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
137static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
138static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
139#endif
140DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
142 IEMNATIVEGSTREG enmGstReg, uint32_t off);
143
144
145/*********************************************************************************************************************************
146* Executable Memory Allocator *
147*********************************************************************************************************************************/
148/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
149 * Use an alternative chunk sub-allocator that does store internal data
150 * in the chunk.
151 *
152 * Using the RTHeapSimple is not practial on newer darwin systems where
153 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
154 * memory. We would have to change the protection of the whole chunk for
155 * every call to RTHeapSimple, which would be rather expensive.
156 *
157 * This alternative implemenation let restrict page protection modifications
158 * to the pages backing the executable memory we just allocated.
159 */
160#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
161/** The chunk sub-allocation unit size in bytes. */
162#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
163/** The chunk sub-allocation unit size as a shift factor. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
165
166#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
167# ifdef IEMNATIVE_USE_GDB_JIT
168# define IEMNATIVE_USE_GDB_JIT_ET_DYN
169
170/** GDB JIT: Code entry. */
171typedef struct GDBJITCODEENTRY
172{
173 struct GDBJITCODEENTRY *pNext;
174 struct GDBJITCODEENTRY *pPrev;
175 uint8_t *pbSymFile;
176 uint64_t cbSymFile;
177} GDBJITCODEENTRY;
178
179/** GDB JIT: Actions. */
180typedef enum GDBJITACTIONS : uint32_t
181{
182 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
183} GDBJITACTIONS;
184
185/** GDB JIT: Descriptor. */
186typedef struct GDBJITDESCRIPTOR
187{
188 uint32_t uVersion;
189 GDBJITACTIONS enmAction;
190 GDBJITCODEENTRY *pRelevant;
191 GDBJITCODEENTRY *pHead;
192 /** Our addition: */
193 GDBJITCODEENTRY *pTail;
194} GDBJITDESCRIPTOR;
195
196/** GDB JIT: Our simple symbol file data. */
197typedef struct GDBJITSYMFILE
198{
199 Elf64_Ehdr EHdr;
200# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
201 Elf64_Shdr aShdrs[5];
202# else
203 Elf64_Shdr aShdrs[7];
204 Elf64_Phdr aPhdrs[2];
205# endif
206 /** The dwarf ehframe data for the chunk. */
207 uint8_t abEhFrame[512];
208 char szzStrTab[128];
209 Elf64_Sym aSymbols[3];
210# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
211 Elf64_Sym aDynSyms[2];
212 Elf64_Dyn aDyn[6];
213# endif
214} GDBJITSYMFILE;
215
216extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
217extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
218
219/** Init once for g_IemNativeGdbJitLock. */
220static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
221/** Init once for the critical section. */
222static RTCRITSECT g_IemNativeGdbJitLock;
223
224/** GDB reads the info here. */
225GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
226
227/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
228DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
229{
230 ASMNopPause();
231}
232
233/** @callback_method_impl{FNRTONCE} */
234static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
235{
236 RT_NOREF(pvUser);
237 return RTCritSectInit(&g_IemNativeGdbJitLock);
238}
239
240
241# endif /* IEMNATIVE_USE_GDB_JIT */
242
243/**
244 * Per-chunk unwind info for non-windows hosts.
245 */
246typedef struct IEMEXECMEMCHUNKEHFRAME
247{
248# ifdef IEMNATIVE_USE_LIBUNWIND
249 /** The offset of the FDA into abEhFrame. */
250 uintptr_t offFda;
251# else
252 /** 'struct object' storage area. */
253 uint8_t abObject[1024];
254# endif
255# ifdef IEMNATIVE_USE_GDB_JIT
256# if 0
257 /** The GDB JIT 'symbol file' data. */
258 GDBJITSYMFILE GdbJitSymFile;
259# endif
260 /** The GDB JIT list entry. */
261 GDBJITCODEENTRY GdbJitEntry;
262# endif
263 /** The dwarf ehframe data for the chunk. */
264 uint8_t abEhFrame[512];
265} IEMEXECMEMCHUNKEHFRAME;
266/** Pointer to per-chunk info info for non-windows hosts. */
267typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
268#endif
269
270
271/**
272 * An chunk of executable memory.
273 */
274typedef struct IEMEXECMEMCHUNK
275{
276#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
277 /** Number of free items in this chunk. */
278 uint32_t cFreeUnits;
279 /** Hint were to start searching for free space in the allocation bitmap. */
280 uint32_t idxFreeHint;
281#else
282 /** The heap handle. */
283 RTHEAPSIMPLE hHeap;
284#endif
285 /** Pointer to the chunk. */
286 void *pvChunk;
287#ifdef IN_RING3
288 /**
289 * Pointer to the unwind information.
290 *
291 * This is used during C++ throw and longjmp (windows and probably most other
292 * platforms). Some debuggers (windbg) makes use of it as well.
293 *
294 * Windows: This is allocated from hHeap on windows because (at least for
295 * AMD64) the UNWIND_INFO structure address in the
296 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
297 *
298 * Others: Allocated from the regular heap to avoid unnecessary executable data
299 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
300 void *pvUnwindInfo;
301#elif defined(IN_RING0)
302 /** Allocation handle. */
303 RTR0MEMOBJ hMemObj;
304#endif
305} IEMEXECMEMCHUNK;
306/** Pointer to a memory chunk. */
307typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
308
309
310/**
311 * Executable memory allocator for the native recompiler.
312 */
313typedef struct IEMEXECMEMALLOCATOR
314{
315 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
316 uint32_t uMagic;
317
318 /** The chunk size. */
319 uint32_t cbChunk;
320 /** The maximum number of chunks. */
321 uint32_t cMaxChunks;
322 /** The current number of chunks. */
323 uint32_t cChunks;
324 /** Hint where to start looking for available memory. */
325 uint32_t idxChunkHint;
326 /** Statistics: Current number of allocations. */
327 uint32_t cAllocations;
328
329 /** The total amount of memory available. */
330 uint64_t cbTotal;
331 /** Total amount of free memory. */
332 uint64_t cbFree;
333 /** Total amount of memory allocated. */
334 uint64_t cbAllocated;
335
336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
337 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
338 *
339 * Since the chunk size is a power of two and the minimum chunk size is a lot
340 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
341 * require a whole number of uint64_t elements in the allocation bitmap. So,
342 * for sake of simplicity, they are allocated as one continous chunk for
343 * simplicity/laziness. */
344 uint64_t *pbmAlloc;
345 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
346 uint32_t cUnitsPerChunk;
347 /** Number of bitmap elements per chunk (for quickly locating the bitmap
348 * portion corresponding to an chunk). */
349 uint32_t cBitmapElementsPerChunk;
350#else
351 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
352 * @{ */
353 /** The size of the heap internal block header. This is used to adjust the
354 * request memory size to make sure there is exacly enough room for a header at
355 * the end of the blocks we allocate before the next 64 byte alignment line. */
356 uint32_t cbHeapBlockHdr;
357 /** The size of initial heap allocation required make sure the first
358 * allocation is correctly aligned. */
359 uint32_t cbHeapAlignTweak;
360 /** The alignment tweak allocation address. */
361 void *pvAlignTweak;
362 /** @} */
363#endif
364
365#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
366 /** Pointer to the array of unwind info running parallel to aChunks (same
367 * allocation as this structure, located after the bitmaps).
368 * (For Windows, the structures must reside in 32-bit RVA distance to the
369 * actual chunk, so they are allocated off the chunk.) */
370 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
371#endif
372
373 /** The allocation chunks. */
374 RT_FLEXIBLE_ARRAY_EXTENSION
375 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
376} IEMEXECMEMALLOCATOR;
377/** Pointer to an executable memory allocator. */
378typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
379
380/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
381#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
382
383
384static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
385
386
387/**
388 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
389 * the heap statistics.
390 */
391static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
392 uint32_t cbReq, uint32_t idxChunk)
393{
394 pExecMemAllocator->cAllocations += 1;
395 pExecMemAllocator->cbAllocated += cbReq;
396#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
397 pExecMemAllocator->cbFree -= cbReq;
398#else
399 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
400#endif
401 pExecMemAllocator->idxChunkHint = idxChunk;
402
403#ifdef RT_OS_DARWIN
404 /*
405 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
406 * on darwin. So, we mark the pages returned as read+write after alloc and
407 * expect the caller to call iemExecMemAllocatorReadyForUse when done
408 * writing to the allocation.
409 *
410 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
411 * for details.
412 */
413 /** @todo detect if this is necessary... it wasn't required on 10.15 or
414 * whatever older version it was. */
415 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
416 AssertRC(rc);
417#endif
418
419 return pvRet;
420}
421
422
423#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
424static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
425 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
426{
427 /*
428 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
429 */
430 Assert(!(cToScan & 63));
431 Assert(!(idxFirst & 63));
432 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
433 pbmAlloc += idxFirst / 64;
434
435 /*
436 * Scan the bitmap for cReqUnits of consequtive clear bits
437 */
438 /** @todo This can probably be done more efficiently for non-x86 systems. */
439 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
440 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
441 {
442 uint32_t idxAddBit = 1;
443 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
444 idxAddBit++;
445 if (idxAddBit >= cReqUnits)
446 {
447 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
448
449 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
450 pChunk->cFreeUnits -= cReqUnits;
451 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
452
453 void * const pvRet = (uint8_t *)pChunk->pvChunk
454 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
455
456 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
457 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
458 }
459
460 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
461 }
462 return NULL;
463}
464#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
465
466
467static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
468{
469#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
470 /*
471 * Figure out how much to allocate.
472 */
473 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
474 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
475 {
476 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
477 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
478 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
479 {
480 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
481 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
482 if (pvRet)
483 return pvRet;
484 }
485 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
486 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
487 cReqUnits, idxChunk);
488 }
489#else
490 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
491 if (pvRet)
492 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
493#endif
494 return NULL;
495
496}
497
498
499/**
500 * Allocates @a cbReq bytes of executable memory.
501 *
502 * @returns Pointer to the memory, NULL if out of memory or other problem
503 * encountered.
504 * @param pVCpu The cross context virtual CPU structure of the calling
505 * thread.
506 * @param cbReq How many bytes are required.
507 */
508static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
509{
510 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
511 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
512 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
513
514
515 for (unsigned iIteration = 0;; iIteration++)
516 {
517 /*
518 * Adjust the request size so it'll fit the allocator alignment/whatnot.
519 *
520 * For the RTHeapSimple allocator this means to follow the logic described
521 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
522 * existing chunks if we think we've got sufficient free memory around.
523 *
524 * While for the alternative one we just align it up to a whole unit size.
525 */
526#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
527 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
528#else
529 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
530#endif
531 if (cbReq <= pExecMemAllocator->cbFree)
532 {
533 uint32_t const cChunks = pExecMemAllocator->cChunks;
534 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
535 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
536 {
537 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
538 if (pvRet)
539 return pvRet;
540 }
541 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 }
548
549 /*
550 * Can we grow it with another chunk?
551 */
552 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
553 {
554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
555 AssertLogRelRCReturn(rc, NULL);
556
557 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
558 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
559 if (pvRet)
560 return pvRet;
561 AssertFailed();
562 }
563
564 /*
565 * Try prune native TBs once.
566 */
567 if (iIteration == 0)
568 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
569 else
570 {
571 /** @todo stats... */
572 return NULL;
573 }
574 }
575
576}
577
578
579/** This is a hook that we may need later for changing memory protection back
580 * to readonly+exec */
581static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
582{
583#ifdef RT_OS_DARWIN
584 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
585 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
586 AssertRC(rc); RT_NOREF(pVCpu);
587
588 /*
589 * Flush the instruction cache:
590 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
591 */
592 /* sys_dcache_flush(pv, cb); - not necessary */
593 sys_icache_invalidate(pv, cb);
594#else
595 RT_NOREF(pVCpu, pv, cb);
596#endif
597}
598
599
600/**
601 * Frees executable memory.
602 */
603void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
604{
605 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
606 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
607 Assert(pv);
608#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
609 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
610#else
611 Assert(!((uintptr_t)pv & 63));
612#endif
613
614 /* Align the size as we did when allocating the block. */
615#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
616 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
617#else
618 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
619#endif
620
621 /* Free it / assert sanity. */
622#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
623 uint32_t const cChunks = pExecMemAllocator->cChunks;
624 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
625 bool fFound = false;
626 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
627 {
628 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
629 fFound = offChunk < cbChunk;
630 if (fFound)
631 {
632#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
633 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
634 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
635
636 /* Check that it's valid and free it. */
637 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
638 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
639 for (uint32_t i = 1; i < cReqUnits; i++)
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
641 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
642
643 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
644 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
645
646 /* Update the stats. */
647 pExecMemAllocator->cbAllocated -= cb;
648 pExecMemAllocator->cbFree += cb;
649 pExecMemAllocator->cAllocations -= 1;
650 return;
651#else
652 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
653 break;
654#endif
655 }
656 }
657# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
658 AssertFailed();
659# else
660 Assert(fFound);
661# endif
662#endif
663
664#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
665 /* Update stats while cb is freshly calculated.*/
666 pExecMemAllocator->cbAllocated -= cb;
667 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
668 pExecMemAllocator->cAllocations -= 1;
669
670 /* Free it. */
671 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
672#endif
673}
674
675
676
677#ifdef IN_RING3
678# ifdef RT_OS_WINDOWS
679
680/**
681 * Initializes the unwind info structures for windows hosts.
682 */
683static int
684iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
685 void *pvChunk, uint32_t idxChunk)
686{
687 RT_NOREF(pVCpu);
688
689 /*
690 * The AMD64 unwind opcodes.
691 *
692 * This is a program that starts with RSP after a RET instruction that
693 * ends up in recompiled code, and the operations we describe here will
694 * restore all non-volatile registers and bring RSP back to where our
695 * RET address is. This means it's reverse order from what happens in
696 * the prologue.
697 *
698 * Note! Using a frame register approach here both because we have one
699 * and but mainly because the UWOP_ALLOC_LARGE argument values
700 * would be a pain to write initializers for. On the positive
701 * side, we're impervious to changes in the the stack variable
702 * area can can deal with dynamic stack allocations if necessary.
703 */
704 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
705 {
706 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
707 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
708 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
709 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
710 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
711 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
712 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
713 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
714 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
715 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
716 };
717 union
718 {
719 IMAGE_UNWIND_INFO Info;
720 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
721 } s_UnwindInfo =
722 {
723 {
724 /* .Version = */ 1,
725 /* .Flags = */ 0,
726 /* .SizeOfProlog = */ 16, /* whatever */
727 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
728 /* .FrameRegister = */ X86_GREG_xBP,
729 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
730 }
731 };
732 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
733 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
734
735 /*
736 * Calc how much space we need and allocate it off the exec heap.
737 */
738 unsigned const cFunctionEntries = 1;
739 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
740 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
741# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
743 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
744 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
745# else
746 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
747 - pExecMemAllocator->cbHeapBlockHdr;
748 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
749 32 /*cbAlignment*/);
750# endif
751 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
752 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
753
754 /*
755 * Initialize the structures.
756 */
757 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
758
759 paFunctions[0].BeginAddress = 0;
760 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
761 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
762
763 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
764 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
765
766 /*
767 * Register it.
768 */
769 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
770 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
771
772 return VINF_SUCCESS;
773}
774
775
776# else /* !RT_OS_WINDOWS */
777
778/**
779 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
780 */
781DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
782{
783 if (iValue >= 64)
784 {
785 Assert(iValue < 0x2000);
786 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
787 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
788 }
789 else if (iValue >= 0)
790 *Ptr.pb++ = (uint8_t)iValue;
791 else if (iValue > -64)
792 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
793 else
794 {
795 Assert(iValue > -0x2000);
796 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
797 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
798 }
799 return Ptr;
800}
801
802
803/**
804 * Emits an ULEB128 encoded value (up to 64-bit wide).
805 */
806DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
807{
808 while (uValue >= 0x80)
809 {
810 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
811 uValue >>= 7;
812 }
813 *Ptr.pb++ = (uint8_t)uValue;
814 return Ptr;
815}
816
817
818/**
819 * Emits a CFA rule as register @a uReg + offset @a off.
820 */
821DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
822{
823 *Ptr.pb++ = DW_CFA_def_cfa;
824 Ptr = iemDwarfPutUleb128(Ptr, uReg);
825 Ptr = iemDwarfPutUleb128(Ptr, off);
826 return Ptr;
827}
828
829
830/**
831 * Emits a register (@a uReg) save location:
832 * CFA + @a off * data_alignment_factor
833 */
834DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
835{
836 if (uReg < 0x40)
837 *Ptr.pb++ = DW_CFA_offset | uReg;
838 else
839 {
840 *Ptr.pb++ = DW_CFA_offset_extended;
841 Ptr = iemDwarfPutUleb128(Ptr, uReg);
842 }
843 Ptr = iemDwarfPutUleb128(Ptr, off);
844 return Ptr;
845}
846
847
848# if 0 /* unused */
849/**
850 * Emits a register (@a uReg) save location, using signed offset:
851 * CFA + @a offSigned * data_alignment_factor
852 */
853DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
854{
855 *Ptr.pb++ = DW_CFA_offset_extended_sf;
856 Ptr = iemDwarfPutUleb128(Ptr, uReg);
857 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
858 return Ptr;
859}
860# endif
861
862
863/**
864 * Initializes the unwind info section for non-windows hosts.
865 */
866static int
867iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
868 void *pvChunk, uint32_t idxChunk)
869{
870 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
871 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
872
873 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
874
875 /*
876 * Generate the CIE first.
877 */
878# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
879 uint8_t const iDwarfVer = 3;
880# else
881 uint8_t const iDwarfVer = 4;
882# endif
883 RTPTRUNION const PtrCie = Ptr;
884 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
885 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
886 *Ptr.pb++ = iDwarfVer; /* DwARF version */
887 *Ptr.pb++ = 0; /* Augmentation. */
888 if (iDwarfVer >= 4)
889 {
890 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
891 *Ptr.pb++ = 0; /* Segment selector size. */
892 }
893# ifdef RT_ARCH_AMD64
894 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
895# else
896 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
897# endif
898 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
901# elif defined(RT_ARCH_ARM64)
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
903# else
904# error "port me"
905# endif
906 /* Initial instructions: */
907# ifdef RT_ARCH_AMD64
908 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
916# elif defined(RT_ARCH_ARM64)
917# if 1
918 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
919# else
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
921# endif
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
934 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
935 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
936# else
937# error "port me"
938# endif
939 while ((Ptr.u - PtrCie.u) & 3)
940 *Ptr.pb++ = DW_CFA_nop;
941 /* Finalize the CIE size. */
942 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
943
944 /*
945 * Generate an FDE for the whole chunk area.
946 */
947# ifdef IEMNATIVE_USE_LIBUNWIND
948 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
949# endif
950 RTPTRUNION const PtrFde = Ptr;
951 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
952 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
953 Ptr.pu32++;
954 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
955 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
956# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
957 *Ptr.pb++ = DW_CFA_nop;
958# endif
959 while ((Ptr.u - PtrFde.u) & 3)
960 *Ptr.pb++ = DW_CFA_nop;
961 /* Finalize the FDE size. */
962 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
963
964 /* Terminator entry. */
965 *Ptr.pu32++ = 0;
966 *Ptr.pu32++ = 0; /* just to be sure... */
967 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
968
969 /*
970 * Register it.
971 */
972# ifdef IEMNATIVE_USE_LIBUNWIND
973 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
974# else
975 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
976 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
977# endif
978
979# ifdef IEMNATIVE_USE_GDB_JIT
980 /*
981 * Now for telling GDB about this (experimental).
982 *
983 * This seems to work best with ET_DYN.
984 */
985 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
986# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
987 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
988 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
989# else
990 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
991 - pExecMemAllocator->cbHeapBlockHdr;
992 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
993# endif
994 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
995 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
996
997 RT_ZERO(*pSymFile);
998
999 /*
1000 * The ELF header:
1001 */
1002 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1003 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1004 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1005 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1006 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1007 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1008 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1009 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1010# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1011 pSymFile->EHdr.e_type = ET_DYN;
1012# else
1013 pSymFile->EHdr.e_type = ET_REL;
1014# endif
1015# ifdef RT_ARCH_AMD64
1016 pSymFile->EHdr.e_machine = EM_AMD64;
1017# elif defined(RT_ARCH_ARM64)
1018 pSymFile->EHdr.e_machine = EM_AARCH64;
1019# else
1020# error "port me"
1021# endif
1022 pSymFile->EHdr.e_version = 1; /*?*/
1023 pSymFile->EHdr.e_entry = 0;
1024# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1025 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1026# else
1027 pSymFile->EHdr.e_phoff = 0;
1028# endif
1029 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1030 pSymFile->EHdr.e_flags = 0;
1031 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1032# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1033 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1034 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1035# else
1036 pSymFile->EHdr.e_phentsize = 0;
1037 pSymFile->EHdr.e_phnum = 0;
1038# endif
1039 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1040 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1041 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1042
1043 uint32_t offStrTab = 0;
1044#define APPEND_STR(a_szStr) do { \
1045 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1046 offStrTab += sizeof(a_szStr); \
1047 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1048 } while (0)
1049#define APPEND_STR_FMT(a_szStr, ...) do { \
1050 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1051 offStrTab++; \
1052 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1053 } while (0)
1054
1055 /*
1056 * Section headers.
1057 */
1058 /* Section header #0: NULL */
1059 unsigned i = 0;
1060 APPEND_STR("");
1061 RT_ZERO(pSymFile->aShdrs[i]);
1062 i++;
1063
1064 /* Section header: .eh_frame */
1065 pSymFile->aShdrs[i].sh_name = offStrTab;
1066 APPEND_STR(".eh_frame");
1067 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1068 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1069# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1070 pSymFile->aShdrs[i].sh_offset
1071 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1072# else
1073 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1074 pSymFile->aShdrs[i].sh_offset = 0;
1075# endif
1076
1077 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1078 pSymFile->aShdrs[i].sh_link = 0;
1079 pSymFile->aShdrs[i].sh_info = 0;
1080 pSymFile->aShdrs[i].sh_addralign = 1;
1081 pSymFile->aShdrs[i].sh_entsize = 0;
1082 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1083 i++;
1084
1085 /* Section header: .shstrtab */
1086 unsigned const iShStrTab = i;
1087 pSymFile->EHdr.e_shstrndx = iShStrTab;
1088 pSymFile->aShdrs[i].sh_name = offStrTab;
1089 APPEND_STR(".shstrtab");
1090 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1091 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1092# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1093 pSymFile->aShdrs[i].sh_offset
1094 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1095# else
1096 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1097 pSymFile->aShdrs[i].sh_offset = 0;
1098# endif
1099 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1100 pSymFile->aShdrs[i].sh_link = 0;
1101 pSymFile->aShdrs[i].sh_info = 0;
1102 pSymFile->aShdrs[i].sh_addralign = 1;
1103 pSymFile->aShdrs[i].sh_entsize = 0;
1104 i++;
1105
1106 /* Section header: .symbols */
1107 pSymFile->aShdrs[i].sh_name = offStrTab;
1108 APPEND_STR(".symtab");
1109 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1110 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1111 pSymFile->aShdrs[i].sh_offset
1112 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1113 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1114 pSymFile->aShdrs[i].sh_link = iShStrTab;
1115 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1117 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1118 i++;
1119
1120# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1121 /* Section header: .symbols */
1122 pSymFile->aShdrs[i].sh_name = offStrTab;
1123 APPEND_STR(".dynsym");
1124 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1125 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1126 pSymFile->aShdrs[i].sh_offset
1127 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1128 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1129 pSymFile->aShdrs[i].sh_link = iShStrTab;
1130 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1132 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1133 i++;
1134# endif
1135
1136# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1137 /* Section header: .dynamic */
1138 pSymFile->aShdrs[i].sh_name = offStrTab;
1139 APPEND_STR(".dynamic");
1140 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1141 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1142 pSymFile->aShdrs[i].sh_offset
1143 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1144 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1145 pSymFile->aShdrs[i].sh_link = iShStrTab;
1146 pSymFile->aShdrs[i].sh_info = 0;
1147 pSymFile->aShdrs[i].sh_addralign = 1;
1148 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1149 i++;
1150# endif
1151
1152 /* Section header: .text */
1153 unsigned const iShText = i;
1154 pSymFile->aShdrs[i].sh_name = offStrTab;
1155 APPEND_STR(".text");
1156 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1157 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1158# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1159 pSymFile->aShdrs[i].sh_offset
1160 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1161# else
1162 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1163 pSymFile->aShdrs[i].sh_offset = 0;
1164# endif
1165 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1166 pSymFile->aShdrs[i].sh_link = 0;
1167 pSymFile->aShdrs[i].sh_info = 0;
1168 pSymFile->aShdrs[i].sh_addralign = 1;
1169 pSymFile->aShdrs[i].sh_entsize = 0;
1170 i++;
1171
1172 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1173
1174# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1175 /*
1176 * The program headers:
1177 */
1178 /* Everything in a single LOAD segment: */
1179 i = 0;
1180 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1181 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1182 pSymFile->aPhdrs[i].p_offset
1183 = pSymFile->aPhdrs[i].p_vaddr
1184 = pSymFile->aPhdrs[i].p_paddr = 0;
1185 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1186 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1187 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1188 i++;
1189 /* The .dynamic segment. */
1190 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1191 pSymFile->aPhdrs[i].p_flags = PF_R;
1192 pSymFile->aPhdrs[i].p_offset
1193 = pSymFile->aPhdrs[i].p_vaddr
1194 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1195 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1196 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1197 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1198 i++;
1199
1200 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1201
1202 /*
1203 * The dynamic section:
1204 */
1205 i = 0;
1206 pSymFile->aDyn[i].d_tag = DT_SONAME;
1207 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1208 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1211 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1214 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_NULL;
1223 i++;
1224 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1225# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1226
1227 /*
1228 * Symbol tables:
1229 */
1230 /** @todo gdb doesn't seem to really like this ... */
1231 i = 0;
1232 pSymFile->aSymbols[i].st_name = 0;
1233 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1234 pSymFile->aSymbols[i].st_value = 0;
1235 pSymFile->aSymbols[i].st_size = 0;
1236 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1237 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1238# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1239 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1240# endif
1241 i++;
1242
1243 pSymFile->aSymbols[i].st_name = 0;
1244 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1245 pSymFile->aSymbols[i].st_value = 0;
1246 pSymFile->aSymbols[i].st_size = 0;
1247 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1248 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1249 i++;
1250
1251 pSymFile->aSymbols[i].st_name = offStrTab;
1252 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1253# if 0
1254 pSymFile->aSymbols[i].st_shndx = iShText;
1255 pSymFile->aSymbols[i].st_value = 0;
1256# else
1257 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1258 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1259# endif
1260 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1261 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1262 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1263# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1264 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1265 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1266# endif
1267 i++;
1268
1269 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1270 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1271
1272 /*
1273 * The GDB JIT entry and informing GDB.
1274 */
1275 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1276# if 1
1277 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1278# else
1279 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1280# endif
1281
1282 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1283 RTCritSectEnter(&g_IemNativeGdbJitLock);
1284 pEhFrame->GdbJitEntry.pNext = NULL;
1285 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1286 if (__jit_debug_descriptor.pTail)
1287 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1288 else
1289 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1290 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1291 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1292
1293 /* Notify GDB: */
1294 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1295 __jit_debug_register_code();
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1297 RTCritSectLeave(&g_IemNativeGdbJitLock);
1298
1299# else /* !IEMNATIVE_USE_GDB_JIT */
1300 RT_NOREF(pVCpu);
1301# endif /* !IEMNATIVE_USE_GDB_JIT */
1302
1303 return VINF_SUCCESS;
1304}
1305
1306# endif /* !RT_OS_WINDOWS */
1307#endif /* IN_RING3 */
1308
1309
1310/**
1311 * Adds another chunk to the executable memory allocator.
1312 *
1313 * This is used by the init code for the initial allocation and later by the
1314 * regular allocator function when it's out of memory.
1315 */
1316static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1317{
1318 /* Check that we've room for growth. */
1319 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1320 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1321
1322 /* Allocate a chunk. */
1323#ifdef RT_OS_DARWIN
1324 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1325#else
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1327#endif
1328 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1329
1330#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1331 int rc = VINF_SUCCESS;
1332#else
1333 /* Initialize the heap for the chunk. */
1334 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1335 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1336 AssertRC(rc);
1337 if (RT_SUCCESS(rc))
1338 {
1339 /*
1340 * We want the memory to be aligned on 64 byte, so the first time thru
1341 * here we do some exploratory allocations to see how we can achieve this.
1342 * On subsequent runs we only make an initial adjustment allocation, if
1343 * necessary.
1344 *
1345 * Since we own the heap implementation, we know that the internal block
1346 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1347 * so all we need to wrt allocation size adjustments is to add 32 bytes
1348 * to the size, align up by 64 bytes, and subtract 32 bytes.
1349 *
1350 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1351 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1352 * allocation to force subsequent allocations to return 64 byte aligned
1353 * user areas.
1354 */
1355 if (!pExecMemAllocator->cbHeapBlockHdr)
1356 {
1357 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1358 pExecMemAllocator->cbHeapAlignTweak = 64;
1359 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1360 32 /*cbAlignment*/);
1361 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1362
1363 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1364 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1365 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1366 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1367 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1368
1369 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 RTHeapSimpleFree(hHeap, pvTest2);
1376 RTHeapSimpleFree(hHeap, pvTest1);
1377 }
1378 else
1379 {
1380 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1381 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1382 }
1383 if (RT_SUCCESS(rc))
1384#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1385 {
1386 /*
1387 * Add the chunk.
1388 *
1389 * This must be done before the unwind init so windows can allocate
1390 * memory from the chunk when using the alternative sub-allocator.
1391 */
1392 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1393#ifdef IN_RING3
1394 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1395#endif
1396#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1397 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1398#else
1399 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1400 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1401 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1402 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1403#endif
1404
1405 pExecMemAllocator->cChunks = idxChunk + 1;
1406 pExecMemAllocator->idxChunkHint = idxChunk;
1407
1408#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1409 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1410 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1411#else
1412 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1413 pExecMemAllocator->cbTotal += cbFree;
1414 pExecMemAllocator->cbFree += cbFree;
1415#endif
1416
1417#ifdef IN_RING3
1418 /*
1419 * Initialize the unwind information (this cannot really fail atm).
1420 * (This sets pvUnwindInfo.)
1421 */
1422 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1423 if (RT_SUCCESS(rc))
1424#endif
1425 {
1426 return VINF_SUCCESS;
1427 }
1428
1429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1430 /* Just in case the impossible happens, undo the above up: */
1431 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1432 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1433 pExecMemAllocator->cChunks = idxChunk;
1434 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1435 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1436 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1437 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1438#endif
1439 }
1440#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1441 }
1442#endif
1443 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1444 RT_NOREF(pVCpu);
1445 return rc;
1446}
1447
1448
1449/**
1450 * Initializes the executable memory allocator for native recompilation on the
1451 * calling EMT.
1452 *
1453 * @returns VBox status code.
1454 * @param pVCpu The cross context virtual CPU structure of the calling
1455 * thread.
1456 * @param cbMax The max size of the allocator.
1457 * @param cbInitial The initial allocator size.
1458 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1459 * dependent).
1460 */
1461int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1462{
1463 /*
1464 * Validate input.
1465 */
1466 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1467 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1468 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1469 || cbChunk == 0
1470 || ( RT_IS_POWER_OF_TWO(cbChunk)
1471 && cbChunk >= _1M
1472 && cbChunk <= _256M
1473 && cbChunk <= cbMax),
1474 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1475 VERR_OUT_OF_RANGE);
1476
1477 /*
1478 * Adjust/figure out the chunk size.
1479 */
1480 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1481 {
1482 if (cbMax >= _256M)
1483 cbChunk = _64M;
1484 else
1485 {
1486 if (cbMax < _16M)
1487 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1488 else
1489 cbChunk = (uint32_t)cbMax / 4;
1490 if (!RT_IS_POWER_OF_TWO(cbChunk))
1491 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1492 }
1493 }
1494
1495 if (cbChunk > cbMax)
1496 cbMax = cbChunk;
1497 else
1498 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1499 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1500 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1501
1502 /*
1503 * Allocate and initialize the allocatore instance.
1504 */
1505 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1506#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1507 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1508 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1509 cbNeeded += cbBitmap * cMaxChunks;
1510 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1511 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1512#endif
1513#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1514 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1515 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1516#endif
1517 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1518 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1519 VERR_NO_MEMORY);
1520 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1521 pExecMemAllocator->cbChunk = cbChunk;
1522 pExecMemAllocator->cMaxChunks = cMaxChunks;
1523 pExecMemAllocator->cChunks = 0;
1524 pExecMemAllocator->idxChunkHint = 0;
1525 pExecMemAllocator->cAllocations = 0;
1526 pExecMemAllocator->cbTotal = 0;
1527 pExecMemAllocator->cbFree = 0;
1528 pExecMemAllocator->cbAllocated = 0;
1529#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1530 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1531 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1532 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1533 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1534#endif
1535#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1536 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1537#endif
1538 for (uint32_t i = 0; i < cMaxChunks; i++)
1539 {
1540#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1541 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1542 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1543#else
1544 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1545#endif
1546 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1547#ifdef IN_RING0
1548 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1549#else
1550 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1551#endif
1552 }
1553 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1554
1555 /*
1556 * Do the initial allocations.
1557 */
1558 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1559 {
1560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1561 AssertLogRelRCReturn(rc, rc);
1562 }
1563
1564 pExecMemAllocator->idxChunkHint = 0;
1565
1566 return VINF_SUCCESS;
1567}
1568
1569
1570/*********************************************************************************************************************************
1571* Native Recompilation *
1572*********************************************************************************************************************************/
1573
1574
1575/**
1576 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1577 */
1578IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1579{
1580 pVCpu->iem.s.cInstructions += idxInstr;
1581 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1582}
1583
1584
1585/**
1586 * Used by TB code when it wants to raise a \#GP(0).
1587 */
1588IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1589{
1590 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1591#ifndef _MSC_VER
1592 return VINF_IEM_RAISED_XCPT; /* not reached */
1593#endif
1594}
1595
1596
1597/**
1598 * Used by TB code when detecting opcode changes.
1599 * @see iemThreadeFuncWorkerObsoleteTb
1600 */
1601IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1602{
1603 /* We set fSafeToFree to false where as we're being called in the context
1604 of a TB callback function, which for native TBs means we cannot release
1605 the executable memory till we've returned our way back to iemTbExec as
1606 that return path codes via the native code generated for the TB. */
1607 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1608 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1609 return VINF_IEM_REEXEC_BREAK;
1610}
1611
1612
1613/**
1614 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1615 */
1616IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1617{
1618 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1619 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1620 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1621 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1622 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1623 return VINF_IEM_REEXEC_BREAK;
1624}
1625
1626
1627/**
1628 * Used by TB code when we missed a PC check after a branch.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1631{
1632 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1633 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1634 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1635 pVCpu->iem.s.pbInstrBuf));
1636 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1637 return VINF_IEM_REEXEC_BREAK;
1638}
1639
1640
1641
1642/*********************************************************************************************************************************
1643* Helpers: Segmented memory fetches and stores. *
1644*********************************************************************************************************************************/
1645
1646/**
1647 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1648 */
1649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1650{
1651 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1652}
1653
1654
1655/**
1656 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1657 * to 16 bits.
1658 */
1659IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1660{
1661 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1662}
1663
1664
1665/**
1666 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1667 * to 32 bits.
1668 */
1669IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1670{
1671 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1672}
1673
1674/**
1675 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1676 * to 64 bits.
1677 */
1678IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1679{
1680 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1681}
1682
1683
1684/**
1685 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1686 */
1687IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1688{
1689 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1690}
1691
1692
1693/**
1694 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1695 * to 32 bits.
1696 */
1697IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1698{
1699 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1700}
1701
1702
1703/**
1704 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1705 * to 64 bits.
1706 */
1707IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1708{
1709 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1710}
1711
1712
1713/**
1714 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1715 */
1716IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1717{
1718 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1719}
1720
1721
1722/**
1723 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1724 * to 64 bits.
1725 */
1726IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1727{
1728 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1729}
1730
1731
1732/**
1733 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1736{
1737 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1738}
1739
1740
1741/**
1742 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1745{
1746 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1747}
1748
1749
1750/**
1751 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1754{
1755 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1756}
1757
1758
1759/**
1760 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1763{
1764 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1765}
1766
1767
1768/**
1769 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1772{
1773 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1774}
1775
1776
1777
1778/**
1779 * Used by TB code to push unsigned 16-bit value onto a generic stack.
1780 */
1781IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1782{
1783 iemMemStackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemStackPushU16SafeJmp */
1784}
1785
1786
1787/**
1788 * Used by TB code to push unsigned 32-bit value onto a generic stack.
1789 */
1790IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
1791{
1792 iemMemStackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SafeJmp */
1793}
1794
1795
1796/**
1797 * Used by TB code to push 32-bit selector value onto a generic stack.
1798 *
1799 * Intel CPUs doesn't do write a whole dword, thus the special function.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
1802{
1803 iemMemStackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SRegSafeJmp */
1804}
1805
1806
1807/**
1808 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1809 */
1810IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
1811{
1812 iemMemStackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemStackPushU64SafeJmp */
1813}
1814
1815
1816/**
1817 * Used by TB code to pop a 16-bit general purpose register off a generic stack.
1818 */
1819IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
1820{
1821 iemMemStackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU16SafeJmp */
1822}
1823
1824
1825/**
1826 * Used by TB code to pop a 32-bit general purpose register off a generic stack.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
1829{
1830 iemMemStackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU32SafeJmp */
1831}
1832
1833
1834/**
1835 * Used by TB code to pop a 64-bit general purpose register off a generic stack.
1836 */
1837IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
1838{
1839 iemMemStackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU64SafeJmp */
1840}
1841
1842
1843
1844/*********************************************************************************************************************************
1845* Helpers: Flat memory fetches and stores. *
1846*********************************************************************************************************************************/
1847
1848/**
1849 * Used by TB code to load unsigned 8-bit data w/ flat address.
1850 * @note Zero extending the value to 64-bit to simplify assembly.
1851 */
1852IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1853{
1854 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1855}
1856
1857
1858/**
1859 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1860 * to 16 bits.
1861 * @note Zero extending the value to 64-bit to simplify assembly.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1864{
1865 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1866}
1867
1868
1869/**
1870 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1871 * to 32 bits.
1872 * @note Zero extending the value to 64-bit to simplify assembly.
1873 */
1874IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1875{
1876 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1877}
1878
1879
1880/**
1881 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1882 * to 64 bits.
1883 */
1884IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1885{
1886 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1887}
1888
1889
1890/**
1891 * Used by TB code to load unsigned 16-bit data w/ flat address.
1892 * @note Zero extending the value to 64-bit to simplify assembly.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1895{
1896 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1897}
1898
1899
1900/**
1901 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1902 * to 32 bits.
1903 * @note Zero extending the value to 64-bit to simplify assembly.
1904 */
1905IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1906{
1907 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1908}
1909
1910
1911/**
1912 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1913 * to 64 bits.
1914 * @note Zero extending the value to 64-bit to simplify assembly.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1917{
1918 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1919}
1920
1921
1922/**
1923 * Used by TB code to load unsigned 32-bit data w/ flat address.
1924 * @note Zero extending the value to 64-bit to simplify assembly.
1925 */
1926IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1927{
1928 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1929}
1930
1931
1932/**
1933 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1934 * to 64 bits.
1935 * @note Zero extending the value to 64-bit to simplify assembly.
1936 */
1937IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1938{
1939 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1940}
1941
1942
1943/**
1944 * Used by TB code to load unsigned 64-bit data w/ flat address.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1947{
1948 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1949}
1950
1951
1952/**
1953 * Used by TB code to store unsigned 8-bit data w/ flat address.
1954 */
1955IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1956{
1957 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1958}
1959
1960
1961/**
1962 * Used by TB code to store unsigned 16-bit data w/ flat address.
1963 */
1964IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1965{
1966 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1967}
1968
1969
1970/**
1971 * Used by TB code to store unsigned 32-bit data w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1974{
1975 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1976}
1977
1978
1979/**
1980 * Used by TB code to store unsigned 64-bit data w/ flat address.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1983{
1984 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1985}
1986
1987
1988
1989/**
1990 * Used by TB code to push unsigned 16-bit value onto a flat 32-bit stack.
1991 */
1992IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1993{
1994 iemMemFlat32StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat32StackPushU16SafeJmp */
1995}
1996
1997
1998/**
1999 * Used by TB code to push unsigned 32-bit value onto a flat 32-bit stack.
2000 */
2001IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
2002{
2003 iemMemFlat32StackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SafeJmp */
2004}
2005
2006
2007/**
2008 * Used by TB code to push segment selector value onto a flat 32-bit stack.
2009 *
2010 * Intel CPUs doesn't do write a whole dword, thus the special function.
2011 */
2012IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
2013{
2014 iemMemFlat32StackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SRegSafeJmp */
2015}
2016
2017
2018/**
2019 * Used by TB code to pop a 16-bit general purpose register off a flat 32-bit stack.
2020 */
2021IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
2022{
2023 iemMemFlat32StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU16SafeJmp */
2024}
2025
2026
2027/**
2028 * Used by TB code to pop a 64-bit general purpose register off a flat 32-bit stack.
2029 */
2030IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
2031{
2032 iemMemFlat32StackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU32SafeJmp */
2033}
2034
2035
2036
2037/**
2038 * Used by TB code to push unsigned 16-bit value onto a flat 64-bit stack.
2039 */
2040IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
2041{
2042 iemMemFlat64StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat64StackPushU16SafeJmp */
2043}
2044
2045
2046/**
2047 * Used by TB code to push unsigned 64-bit value onto a flat 64-bit stack.
2048 */
2049IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
2050{
2051 iemMemFlat64StackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemFlat64StackPushU64SafeJmp */
2052}
2053
2054
2055/**
2056 * Used by TB code to pop a 16-bit general purpose register off a flat 64-bit stack.
2057 */
2058IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
2059{
2060 iemMemFlat64StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU16SafeJmp */
2061}
2062
2063
2064/**
2065 * Used by TB code to pop a 64-bit general purpose register off a flat 64-bit stack.
2066 */
2067IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
2068{
2069 iemMemFlat64StackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU64SafeJmp */
2070}
2071
2072
2073
2074/*********************************************************************************************************************************
2075* Helpers: Segmented memory mapping. *
2076*********************************************************************************************************************************/
2077
2078/**
2079 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2082 RTGCPTR GCPtrMem, uint8_t iSegReg))
2083{
2084 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RwSafeJmp */
2085}
2086
2087
2088/**
2089 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2092 RTGCPTR GCPtrMem, uint8_t iSegReg))
2093{
2094 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8WoSafeJmp */
2095}
2096
2097
2098/**
2099 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2100 */
2101IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2102 RTGCPTR GCPtrMem, uint8_t iSegReg))
2103{
2104 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RoSafeJmp */
2105}
2106
2107
2108/**
2109 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2110 */
2111IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2112 RTGCPTR GCPtrMem, uint8_t iSegReg))
2113{
2114 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RwSafeJmp */
2115}
2116
2117
2118/**
2119 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2122 RTGCPTR GCPtrMem, uint8_t iSegReg))
2123{
2124 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16WoSafeJmp */
2125}
2126
2127
2128/**
2129 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2130 */
2131IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2132 RTGCPTR GCPtrMem, uint8_t iSegReg))
2133{
2134 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RoSafeJmp */
2135}
2136
2137
2138/**
2139 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2140 */
2141IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2142 RTGCPTR GCPtrMem, uint8_t iSegReg))
2143{
2144 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RwSafeJmp */
2145}
2146
2147
2148/**
2149 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2150 */
2151IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2152 RTGCPTR GCPtrMem, uint8_t iSegReg))
2153{
2154 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32WoSafeJmp */
2155}
2156
2157
2158/**
2159 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2160 */
2161IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2162 RTGCPTR GCPtrMem, uint8_t iSegReg))
2163{
2164 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RoSafeJmp */
2165}
2166
2167
2168/**
2169 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2172 RTGCPTR GCPtrMem, uint8_t iSegReg))
2173{
2174 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RwSafeJmp */
2175}
2176
2177
2178/**
2179 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2182 RTGCPTR GCPtrMem, uint8_t iSegReg))
2183{
2184 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64WoSafeJmp */
2185}
2186
2187
2188/**
2189 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2190 */
2191IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2192 RTGCPTR GCPtrMem, uint8_t iSegReg))
2193{
2194 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RoSafeJmp */
2195}
2196
2197
2198/**
2199 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2202 RTGCPTR GCPtrMem, uint8_t iSegReg))
2203{
2204 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataR80WoSafeJmp */
2205}
2206
2207
2208/**
2209 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2210 */
2211IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2212 RTGCPTR GCPtrMem, uint8_t iSegReg))
2213{
2214 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataD80WoSafeJmp */
2215}
2216
2217
2218/**
2219 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2220 */
2221IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2222 RTGCPTR GCPtrMem, uint8_t iSegReg))
2223{
2224 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RwSafeJmp */
2225}
2226
2227
2228/**
2229 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2230 */
2231IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2232 RTGCPTR GCPtrMem, uint8_t iSegReg))
2233{
2234 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128WoSafeJmp */
2235}
2236
2237
2238/**
2239 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2240 */
2241IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2242 RTGCPTR GCPtrMem, uint8_t iSegReg))
2243{
2244 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RoSafeJmp */
2245}
2246
2247
2248/*********************************************************************************************************************************
2249* Helpers: Flat memory mapping. *
2250*********************************************************************************************************************************/
2251
2252/**
2253 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2254 */
2255IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2256{
2257 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RwSafeJmp */
2258}
2259
2260
2261/**
2262 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2263 */
2264IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2265{
2266 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8WoSafeJmp */
2267}
2268
2269
2270/**
2271 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2272 */
2273IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2274{
2275 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RoSafeJmp */
2276}
2277
2278
2279/**
2280 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2281 */
2282IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2283{
2284 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RwSafeJmp */
2285}
2286
2287
2288/**
2289 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2290 */
2291IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2292{
2293 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16WoSafeJmp */
2294}
2295
2296
2297/**
2298 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2299 */
2300IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2301{
2302 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RoSafeJmp */
2303}
2304
2305
2306/**
2307 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2308 */
2309IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2310{
2311 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RwSafeJmp */
2312}
2313
2314
2315/**
2316 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2317 */
2318IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2319{
2320 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32WoSafeJmp */
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2328{
2329 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RoSafeJmp */
2330}
2331
2332
2333/**
2334 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2335 */
2336IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2337{
2338 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RwSafeJmp */
2339}
2340
2341
2342/**
2343 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2344 */
2345IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2346{
2347 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64WoSafeJmp */
2348}
2349
2350
2351/**
2352 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2353 */
2354IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2355{
2356 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RoSafeJmp */
2357}
2358
2359
2360/**
2361 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2362 */
2363IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2364{
2365 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataR80WoSafeJmp */
2366}
2367
2368
2369/**
2370 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2371 */
2372IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2373{
2374 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataD80WoSafeJmp */
2375}
2376
2377
2378/**
2379 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2380 */
2381IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2382{
2383 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RwSafeJmp */
2384}
2385
2386
2387/**
2388 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2389 */
2390IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2391{
2392 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128WoSafeJmp */
2393}
2394
2395
2396/**
2397 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2400{
2401 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RoSafeJmp */
2402}
2403
2404
2405/*********************************************************************************************************************************
2406* Helpers: Commit, rollback & unmap *
2407*********************************************************************************************************************************/
2408
2409/**
2410 * Used by TB code to commit and unmap a read-write memory mapping.
2411 */
2412IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2413{
2414 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2415}
2416
2417
2418/**
2419 * Used by TB code to commit and unmap a write-only memory mapping.
2420 */
2421IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2422{
2423 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2424}
2425
2426
2427/**
2428 * Used by TB code to commit and unmap a read-only memory mapping.
2429 */
2430IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2431{
2432 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2433}
2434
2435
2436/**
2437 * Reinitializes the native recompiler state.
2438 *
2439 * Called before starting a new recompile job.
2440 */
2441static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2442{
2443 pReNative->cLabels = 0;
2444 pReNative->bmLabelTypes = 0;
2445 pReNative->cFixups = 0;
2446#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2447 pReNative->pDbgInfo->cEntries = 0;
2448#endif
2449 pReNative->pTbOrg = pTb;
2450 pReNative->cCondDepth = 0;
2451 pReNative->uCondSeqNo = 0;
2452 pReNative->uCheckIrqSeqNo = 0;
2453 pReNative->uTlbSeqNo = 0;
2454
2455 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2456#if IEMNATIVE_HST_GREG_COUNT < 32
2457 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2458#endif
2459 ;
2460 pReNative->Core.bmHstRegsWithGstShadow = 0;
2461 pReNative->Core.bmGstRegShadows = 0;
2462 pReNative->Core.bmVars = 0;
2463 pReNative->Core.bmStack = 0;
2464 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2465 pReNative->Core.u64ArgVars = UINT64_MAX;
2466
2467 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2468 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2469 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2470 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2471 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2472 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2473 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2474 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2475 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2476 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2477
2478 /* Full host register reinit: */
2479 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2480 {
2481 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2482 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2483 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2484 }
2485
2486 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2487 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2488#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2489 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2490#endif
2491#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2492 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2493#endif
2494 );
2495 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2496 {
2497 fRegs &= ~RT_BIT_32(idxReg);
2498 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2499 }
2500
2501 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2502#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2503 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2504#endif
2505#ifdef IEMNATIVE_REG_FIXED_TMP0
2506 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2507#endif
2508 return pReNative;
2509}
2510
2511
2512/**
2513 * Allocates and initializes the native recompiler state.
2514 *
2515 * This is called the first time an EMT wants to recompile something.
2516 *
2517 * @returns Pointer to the new recompiler state.
2518 * @param pVCpu The cross context virtual CPU structure of the calling
2519 * thread.
2520 * @param pTb The TB that's about to be recompiled.
2521 * @thread EMT(pVCpu)
2522 */
2523static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2524{
2525 VMCPU_ASSERT_EMT(pVCpu);
2526
2527 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2528 AssertReturn(pReNative, NULL);
2529
2530 /*
2531 * Try allocate all the buffers and stuff we need.
2532 */
2533 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2534 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2535 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2536#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2537 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2538#endif
2539 if (RT_LIKELY( pReNative->pInstrBuf
2540 && pReNative->paLabels
2541 && pReNative->paFixups)
2542#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2543 && pReNative->pDbgInfo
2544#endif
2545 )
2546 {
2547 /*
2548 * Set the buffer & array sizes on success.
2549 */
2550 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2551 pReNative->cLabelsAlloc = _8K;
2552 pReNative->cFixupsAlloc = _16K;
2553#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2554 pReNative->cDbgInfoAlloc = _16K;
2555#endif
2556
2557 /*
2558 * Done, just need to save it and reinit it.
2559 */
2560 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2561 return iemNativeReInit(pReNative, pTb);
2562 }
2563
2564 /*
2565 * Failed. Cleanup and return.
2566 */
2567 AssertFailed();
2568 RTMemFree(pReNative->pInstrBuf);
2569 RTMemFree(pReNative->paLabels);
2570 RTMemFree(pReNative->paFixups);
2571#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2572 RTMemFree(pReNative->pDbgInfo);
2573#endif
2574 RTMemFree(pReNative);
2575 return NULL;
2576}
2577
2578
2579/**
2580 * Creates a label
2581 *
2582 * If the label does not yet have a defined position,
2583 * call iemNativeLabelDefine() later to set it.
2584 *
2585 * @returns Label ID. Throws VBox status code on failure, so no need to check
2586 * the return value.
2587 * @param pReNative The native recompile state.
2588 * @param enmType The label type.
2589 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2590 * label is not yet defined (default).
2591 * @param uData Data associated with the lable. Only applicable to
2592 * certain type of labels. Default is zero.
2593 */
2594DECL_HIDDEN_THROW(uint32_t)
2595iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2596 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2597{
2598 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2599
2600 /*
2601 * Locate existing label definition.
2602 *
2603 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2604 * and uData is zero.
2605 */
2606 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2607 uint32_t const cLabels = pReNative->cLabels;
2608 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2609#ifndef VBOX_STRICT
2610 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2611 && offWhere == UINT32_MAX
2612 && uData == 0
2613#endif
2614 )
2615 {
2616#ifndef VBOX_STRICT
2617 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2618 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2619 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2620 if (idxLabel < pReNative->cLabels)
2621 return idxLabel;
2622#else
2623 for (uint32_t i = 0; i < cLabels; i++)
2624 if ( paLabels[i].enmType == enmType
2625 && paLabels[i].uData == uData)
2626 {
2627 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2628 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2629 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2630 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2631 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2632 return i;
2633 }
2634 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2635 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2636#endif
2637 }
2638
2639 /*
2640 * Make sure we've got room for another label.
2641 */
2642 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2643 { /* likely */ }
2644 else
2645 {
2646 uint32_t cNew = pReNative->cLabelsAlloc;
2647 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2648 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2649 cNew *= 2;
2650 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2651 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2652 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2653 pReNative->paLabels = paLabels;
2654 pReNative->cLabelsAlloc = cNew;
2655 }
2656
2657 /*
2658 * Define a new label.
2659 */
2660 paLabels[cLabels].off = offWhere;
2661 paLabels[cLabels].enmType = enmType;
2662 paLabels[cLabels].uData = uData;
2663 pReNative->cLabels = cLabels + 1;
2664
2665 Assert((unsigned)enmType < 64);
2666 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2667
2668 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2669 {
2670 Assert(uData == 0);
2671 pReNative->aidxUniqueLabels[enmType] = cLabels;
2672 }
2673
2674 if (offWhere != UINT32_MAX)
2675 {
2676#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2677 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2678 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2679#endif
2680 }
2681 return cLabels;
2682}
2683
2684
2685/**
2686 * Defines the location of an existing label.
2687 *
2688 * @param pReNative The native recompile state.
2689 * @param idxLabel The label to define.
2690 * @param offWhere The position.
2691 */
2692DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2693{
2694 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2695 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2696 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2697 pLabel->off = offWhere;
2698#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2699 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2700 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2701#endif
2702}
2703
2704
2705/**
2706 * Looks up a lable.
2707 *
2708 * @returns Label ID if found, UINT32_MAX if not.
2709 */
2710static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2711 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2712{
2713 Assert((unsigned)enmType < 64);
2714 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2715 {
2716 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2717 return pReNative->aidxUniqueLabels[enmType];
2718
2719 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2720 uint32_t const cLabels = pReNative->cLabels;
2721 for (uint32_t i = 0; i < cLabels; i++)
2722 if ( paLabels[i].enmType == enmType
2723 && paLabels[i].uData == uData
2724 && ( paLabels[i].off == offWhere
2725 || offWhere == UINT32_MAX
2726 || paLabels[i].off == UINT32_MAX))
2727 return i;
2728 }
2729 return UINT32_MAX;
2730}
2731
2732
2733/**
2734 * Adds a fixup.
2735 *
2736 * @throws VBox status code (int) on failure.
2737 * @param pReNative The native recompile state.
2738 * @param offWhere The instruction offset of the fixup location.
2739 * @param idxLabel The target label ID for the fixup.
2740 * @param enmType The fixup type.
2741 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2742 */
2743DECL_HIDDEN_THROW(void)
2744iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2745 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2746{
2747 Assert(idxLabel <= UINT16_MAX);
2748 Assert((unsigned)enmType <= UINT8_MAX);
2749
2750 /*
2751 * Make sure we've room.
2752 */
2753 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2754 uint32_t const cFixups = pReNative->cFixups;
2755 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2756 { /* likely */ }
2757 else
2758 {
2759 uint32_t cNew = pReNative->cFixupsAlloc;
2760 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2761 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2762 cNew *= 2;
2763 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2764 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2765 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2766 pReNative->paFixups = paFixups;
2767 pReNative->cFixupsAlloc = cNew;
2768 }
2769
2770 /*
2771 * Add the fixup.
2772 */
2773 paFixups[cFixups].off = offWhere;
2774 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2775 paFixups[cFixups].enmType = enmType;
2776 paFixups[cFixups].offAddend = offAddend;
2777 pReNative->cFixups = cFixups + 1;
2778}
2779
2780
2781/**
2782 * Slow code path for iemNativeInstrBufEnsure.
2783 */
2784DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2785{
2786 /* Double the buffer size till we meet the request. */
2787 uint32_t cNew = pReNative->cInstrBufAlloc;
2788 AssertReturn(cNew > 0, NULL);
2789 do
2790 cNew *= 2;
2791 while (cNew < off + cInstrReq);
2792
2793 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2794#ifdef RT_ARCH_ARM64
2795 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2796#else
2797 uint32_t const cbMaxInstrBuf = _2M;
2798#endif
2799 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2800
2801 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2802 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2803
2804 pReNative->cInstrBufAlloc = cNew;
2805 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2806}
2807
2808#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2809
2810/**
2811 * Grows the static debug info array used during recompilation.
2812 *
2813 * @returns Pointer to the new debug info block; throws VBox status code on
2814 * failure, so no need to check the return value.
2815 */
2816DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2817{
2818 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2819 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2820 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2821 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2822 pReNative->pDbgInfo = pDbgInfo;
2823 pReNative->cDbgInfoAlloc = cNew;
2824 return pDbgInfo;
2825}
2826
2827
2828/**
2829 * Adds a new debug info uninitialized entry, returning the pointer to it.
2830 */
2831DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2832{
2833 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2834 { /* likely */ }
2835 else
2836 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2837 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2838}
2839
2840
2841/**
2842 * Debug Info: Adds a native offset record, if necessary.
2843 */
2844static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2845{
2846 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2847
2848 /*
2849 * Search backwards to see if we've got a similar record already.
2850 */
2851 uint32_t idx = pDbgInfo->cEntries;
2852 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
2853 while (idx-- > idxStop)
2854 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
2855 {
2856 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
2857 return;
2858 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
2859 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2860 break;
2861 }
2862
2863 /*
2864 * Add it.
2865 */
2866 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2867 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2868 pEntry->NativeOffset.offNative = off;
2869}
2870
2871
2872/**
2873 * Debug Info: Record info about a label.
2874 */
2875static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2876{
2877 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2878 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2879 pEntry->Label.uUnused = 0;
2880 pEntry->Label.enmLabel = (uint8_t)enmType;
2881 pEntry->Label.uData = uData;
2882}
2883
2884
2885/**
2886 * Debug Info: Record info about a threaded call.
2887 */
2888static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2889{
2890 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2891 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2892 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2893 pEntry->ThreadedCall.uUnused = 0;
2894 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2895}
2896
2897
2898/**
2899 * Debug Info: Record info about a new guest instruction.
2900 */
2901static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2902{
2903 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2904 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2905 pEntry->GuestInstruction.uUnused = 0;
2906 pEntry->GuestInstruction.fExec = fExec;
2907}
2908
2909
2910/**
2911 * Debug Info: Record info about guest register shadowing.
2912 */
2913static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2914 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2915{
2916 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2917 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2918 pEntry->GuestRegShadowing.uUnused = 0;
2919 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2920 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2921 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2922}
2923
2924#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2925
2926
2927/*********************************************************************************************************************************
2928* Register Allocator *
2929*********************************************************************************************************************************/
2930
2931/**
2932 * Register parameter indexes (indexed by argument number).
2933 */
2934DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2935{
2936 IEMNATIVE_CALL_ARG0_GREG,
2937 IEMNATIVE_CALL_ARG1_GREG,
2938 IEMNATIVE_CALL_ARG2_GREG,
2939 IEMNATIVE_CALL_ARG3_GREG,
2940#if defined(IEMNATIVE_CALL_ARG4_GREG)
2941 IEMNATIVE_CALL_ARG4_GREG,
2942# if defined(IEMNATIVE_CALL_ARG5_GREG)
2943 IEMNATIVE_CALL_ARG5_GREG,
2944# if defined(IEMNATIVE_CALL_ARG6_GREG)
2945 IEMNATIVE_CALL_ARG6_GREG,
2946# if defined(IEMNATIVE_CALL_ARG7_GREG)
2947 IEMNATIVE_CALL_ARG7_GREG,
2948# endif
2949# endif
2950# endif
2951#endif
2952};
2953
2954/**
2955 * Call register masks indexed by argument count.
2956 */
2957DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2958{
2959 0,
2960 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2961 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2962 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2963 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2964 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2965#if defined(IEMNATIVE_CALL_ARG4_GREG)
2966 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2967 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2968# if defined(IEMNATIVE_CALL_ARG5_GREG)
2969 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2970 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2971# if defined(IEMNATIVE_CALL_ARG6_GREG)
2972 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2973 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2974 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2975# if defined(IEMNATIVE_CALL_ARG7_GREG)
2976 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2977 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2978 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2979# endif
2980# endif
2981# endif
2982#endif
2983};
2984
2985#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2986/**
2987 * BP offset of the stack argument slots.
2988 *
2989 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2990 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2991 */
2992DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2993{
2994 IEMNATIVE_FP_OFF_STACK_ARG0,
2995# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2996 IEMNATIVE_FP_OFF_STACK_ARG1,
2997# endif
2998# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2999 IEMNATIVE_FP_OFF_STACK_ARG2,
3000# endif
3001# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3002 IEMNATIVE_FP_OFF_STACK_ARG3,
3003# endif
3004};
3005AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3006#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3007
3008/**
3009 * Info about shadowed guest register values.
3010 * @see IEMNATIVEGSTREG
3011 */
3012static struct
3013{
3014 /** Offset in VMCPU. */
3015 uint32_t off;
3016 /** The field size. */
3017 uint8_t cb;
3018 /** Name (for logging). */
3019 const char *pszName;
3020} const g_aGstShadowInfo[] =
3021{
3022#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3023 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3024 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3025 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3026 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3027 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3028 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3029 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3030 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3031 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3032 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3033 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3034 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3035 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3036 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3037 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3038 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3039 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3040 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3041 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3042 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3043 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3044 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3045 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3046 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3047 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3048 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3049 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3050 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3051 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3052 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3053 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3054 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3055 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3056 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3057 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3058 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3059 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3060 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3061 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3062 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3063 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3064 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3065#undef CPUMCTX_OFF_AND_SIZE
3066};
3067AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3068
3069
3070/** Host CPU general purpose register names. */
3071DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3072{
3073#ifdef RT_ARCH_AMD64
3074 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3075#elif RT_ARCH_ARM64
3076 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3077 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3078#else
3079# error "port me"
3080#endif
3081};
3082
3083
3084DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3085 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3086{
3087 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3088
3089 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3090 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3091 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3092 return (uint8_t)idxReg;
3093}
3094
3095
3096/**
3097 * Tries to locate a suitable register in the given register mask.
3098 *
3099 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3100 * failed.
3101 *
3102 * @returns Host register number on success, returns UINT8_MAX on failure.
3103 */
3104static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3105{
3106 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3107 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3108 if (fRegs)
3109 {
3110 /** @todo pick better here: */
3111 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3112
3113 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3114 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3115 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3116 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3117
3118 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3119 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3120 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3121 return idxReg;
3122 }
3123 return UINT8_MAX;
3124}
3125
3126
3127/**
3128 * Locate a register, possibly freeing one up.
3129 *
3130 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3131 * failed.
3132 *
3133 * @returns Host register number on success. Returns UINT8_MAX if no registers
3134 * found, the caller is supposed to deal with this and raise a
3135 * allocation type specific status code (if desired).
3136 *
3137 * @throws VBox status code if we're run into trouble spilling a variable of
3138 * recording debug info. Does NOT throw anything if we're out of
3139 * registers, though.
3140 */
3141static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3142 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3143{
3144 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3145 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3146
3147 /*
3148 * Try a freed register that's shadowing a guest register
3149 */
3150 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3151 if (fRegs)
3152 {
3153 unsigned const idxReg = (fPreferVolatile
3154 ? ASMBitFirstSetU32(fRegs)
3155 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3156 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
3157 - 1;
3158
3159 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3160 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3161 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3162 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3163
3164 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3165 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3166 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3167 return idxReg;
3168 }
3169
3170 /*
3171 * Try free up a variable that's in a register.
3172 *
3173 * We do two rounds here, first evacuating variables we don't need to be
3174 * saved on the stack, then in the second round move things to the stack.
3175 */
3176 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3177 {
3178 uint32_t fVars = pReNative->Core.bmVars;
3179 while (fVars)
3180 {
3181 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3182 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3183 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3184 && (RT_BIT_32(idxReg) & fRegMask)
3185 && ( iLoop == 0
3186 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3187 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3188 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3189 {
3190 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3191 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3192 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3193 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3194 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3195 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3196
3197 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3198 {
3199 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3200 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3201 }
3202
3203 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3204 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3205
3206 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3207 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3208 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3209 return idxReg;
3210 }
3211 fVars &= ~RT_BIT_32(idxVar);
3212 }
3213 }
3214
3215 return UINT8_MAX;
3216}
3217
3218
3219/**
3220 * Reassigns a variable to a different register specified by the caller.
3221 *
3222 * @returns The new code buffer position.
3223 * @param pReNative The native recompile state.
3224 * @param off The current code buffer position.
3225 * @param idxVar The variable index.
3226 * @param idxRegOld The old host register number.
3227 * @param idxRegNew The new host register number.
3228 * @param pszCaller The caller for logging.
3229 */
3230static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3231 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3232{
3233 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3234 RT_NOREF(pszCaller);
3235
3236 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3237
3238 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3239 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3240 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3241 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3242
3243 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3244 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3245 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3246 if (fGstRegShadows)
3247 {
3248 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3249 | RT_BIT_32(idxRegNew);
3250 while (fGstRegShadows)
3251 {
3252 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3253 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3254
3255 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3256 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3257 }
3258 }
3259
3260 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3261 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3262 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3263 return off;
3264}
3265
3266
3267/**
3268 * Moves a variable to a different register or spills it onto the stack.
3269 *
3270 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3271 * kinds can easily be recreated if needed later.
3272 *
3273 * @returns The new code buffer position.
3274 * @param pReNative The native recompile state.
3275 * @param off The current code buffer position.
3276 * @param idxVar The variable index.
3277 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3278 * call-volatile registers.
3279 */
3280static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3281 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3282{
3283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3284 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3285 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3286
3287 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3288 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3289 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3290 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3291 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3292 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3293 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3294 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3295 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3296
3297
3298 /** @todo Add statistics on this.*/
3299 /** @todo Implement basic variable liveness analysis (python) so variables
3300 * can be freed immediately once no longer used. This has the potential to
3301 * be trashing registers and stack for dead variables. */
3302
3303 /*
3304 * First try move it to a different register, as that's cheaper.
3305 */
3306 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3307 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3308 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3309 if (fRegs)
3310 {
3311 /* Avoid using shadow registers, if possible. */
3312 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3313 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3314 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3315 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3316 }
3317
3318 /*
3319 * Otherwise we must spill the register onto the stack.
3320 */
3321 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3322 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3323 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3324 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3325
3326 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3327 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3328 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3329 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3330 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3331 return off;
3332}
3333
3334
3335/**
3336 * Allocates a temporary host general purpose register.
3337 *
3338 * This may emit code to save register content onto the stack in order to free
3339 * up a register.
3340 *
3341 * @returns The host register number; throws VBox status code on failure,
3342 * so no need to check the return value.
3343 * @param pReNative The native recompile state.
3344 * @param poff Pointer to the variable with the code buffer position.
3345 * This will be update if we need to move a variable from
3346 * register to stack in order to satisfy the request.
3347 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3348 * registers (@c true, default) or the other way around
3349 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3350 */
3351DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3352{
3353 /*
3354 * Try find a completely unused register, preferably a call-volatile one.
3355 */
3356 uint8_t idxReg;
3357 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3358 & ~pReNative->Core.bmHstRegsWithGstShadow
3359 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3360 if (fRegs)
3361 {
3362 if (fPreferVolatile)
3363 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3364 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3365 else
3366 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3367 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3368 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3369 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3370 }
3371 else
3372 {
3373 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3374 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3375 }
3376 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3377}
3378
3379
3380/**
3381 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3382 * registers.
3383 *
3384 * @returns The host register number; throws VBox status code on failure,
3385 * so no need to check the return value.
3386 * @param pReNative The native recompile state.
3387 * @param poff Pointer to the variable with the code buffer position.
3388 * This will be update if we need to move a variable from
3389 * register to stack in order to satisfy the request.
3390 * @param fRegMask Mask of acceptable registers.
3391 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3392 * registers (@c true, default) or the other way around
3393 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3394 */
3395DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3396 bool fPreferVolatile /*= true*/)
3397{
3398 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3399 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3400
3401 /*
3402 * Try find a completely unused register, preferably a call-volatile one.
3403 */
3404 uint8_t idxReg;
3405 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3406 & ~pReNative->Core.bmHstRegsWithGstShadow
3407 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3408 & fRegMask;
3409 if (fRegs)
3410 {
3411 if (fPreferVolatile)
3412 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3413 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3414 else
3415 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3416 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3417 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3418 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3419 }
3420 else
3421 {
3422 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3423 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3424 }
3425 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3426}
3427
3428
3429/**
3430 * Allocates a temporary register for loading an immediate value into.
3431 *
3432 * This will emit code to load the immediate, unless there happens to be an
3433 * unused register with the value already loaded.
3434 *
3435 * The caller will not modify the returned register, it must be considered
3436 * read-only. Free using iemNativeRegFreeTmpImm.
3437 *
3438 * @returns The host register number; throws VBox status code on failure, so no
3439 * need to check the return value.
3440 * @param pReNative The native recompile state.
3441 * @param poff Pointer to the variable with the code buffer position.
3442 * @param uImm The immediate value that the register must hold upon
3443 * return.
3444 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3445 * registers (@c true, default) or the other way around
3446 * (@c false).
3447 *
3448 * @note Reusing immediate values has not been implemented yet.
3449 */
3450DECL_HIDDEN_THROW(uint8_t)
3451iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3452{
3453 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3454 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3455 return idxReg;
3456}
3457
3458
3459/**
3460 * Marks host register @a idxHstReg as containing a shadow copy of guest
3461 * register @a enmGstReg.
3462 *
3463 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3464 * host register before calling.
3465 */
3466DECL_FORCE_INLINE(void)
3467iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3468{
3469 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3470 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3471 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3472
3473 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3474 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
3475 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3476 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3477#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3478 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3479 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3480#else
3481 RT_NOREF(off);
3482#endif
3483}
3484
3485
3486/**
3487 * Clear any guest register shadow claims from @a idxHstReg.
3488 *
3489 * The register does not need to be shadowing any guest registers.
3490 */
3491DECL_FORCE_INLINE(void)
3492iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3493{
3494 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3495 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3496 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3497 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3498 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3499
3500#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3501 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3502 if (fGstRegs)
3503 {
3504 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3505 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3506 while (fGstRegs)
3507 {
3508 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3509 fGstRegs &= ~RT_BIT_64(iGstReg);
3510 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3511 }
3512 }
3513#else
3514 RT_NOREF(off);
3515#endif
3516
3517 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3518 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3519 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3520}
3521
3522
3523/**
3524 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3525 * and global overview flags.
3526 */
3527DECL_FORCE_INLINE(void)
3528iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3529{
3530 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3531 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3532 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3533 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3534 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3535 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3536 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3537
3538#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3539 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3540 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3541#else
3542 RT_NOREF(off);
3543#endif
3544
3545 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3546 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3547 if (!fGstRegShadowsNew)
3548 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3549 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
3550}
3551
3552
3553/**
3554 * Clear any guest register shadow claim for @a enmGstReg.
3555 */
3556DECL_FORCE_INLINE(void)
3557iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3558{
3559 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3560 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3561 {
3562 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3563 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3564 }
3565}
3566
3567
3568/**
3569 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
3570 * as the new shadow of it.
3571 */
3572DECL_FORCE_INLINE(void)
3573iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
3574 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3575{
3576 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3577 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3578 {
3579 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3580 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
3581 return;
3582 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3583 }
3584 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
3585}
3586
3587
3588/**
3589 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
3590 * to @a idxRegTo.
3591 */
3592DECL_FORCE_INLINE(void)
3593iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
3594 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3595{
3596 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
3597 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
3598 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
3599 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
3600 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3601 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
3602 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
3603 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
3604 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
3605
3606 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3607 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
3608 if (!fGstRegShadowsFrom)
3609 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
3610 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
3611 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
3612 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
3613#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3614 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3615 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
3616#else
3617 RT_NOREF(off);
3618#endif
3619}
3620
3621
3622/**
3623 * Allocates a temporary host general purpose register for keeping a guest
3624 * register value.
3625 *
3626 * Since we may already have a register holding the guest register value,
3627 * code will be emitted to do the loading if that's not the case. Code may also
3628 * be emitted if we have to free up a register to satify the request.
3629 *
3630 * @returns The host register number; throws VBox status code on failure, so no
3631 * need to check the return value.
3632 * @param pReNative The native recompile state.
3633 * @param poff Pointer to the variable with the code buffer
3634 * position. This will be update if we need to move a
3635 * variable from register to stack in order to satisfy
3636 * the request.
3637 * @param enmGstReg The guest register that will is to be updated.
3638 * @param enmIntendedUse How the caller will be using the host register.
3639 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3640 * register is okay (default). The ASSUMPTION here is
3641 * that the caller has already flushed all volatile
3642 * registers, so this is only applied if we allocate a
3643 * new register.
3644 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3645 */
3646DECL_HIDDEN_THROW(uint8_t)
3647iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3648 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3649 bool fNoVolatileRegs /*= false*/)
3650{
3651 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3652#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3653 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3654#endif
3655 uint32_t const fRegMask = !fNoVolatileRegs
3656 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3657 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3658
3659 /*
3660 * First check if the guest register value is already in a host register.
3661 */
3662 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3663 {
3664 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3665 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3666 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3667 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3668
3669 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3670 {
3671 /*
3672 * If the register will trash the guest shadow copy, try find a
3673 * completely unused register we can use instead. If that fails,
3674 * we need to disassociate the host reg from the guest reg.
3675 */
3676 /** @todo would be nice to know if preserving the register is in any way helpful. */
3677 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3678 && ( ~pReNative->Core.bmHstRegs
3679 & ~pReNative->Core.bmHstRegsWithGstShadow
3680 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3681 {
3682 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3683
3684 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3685
3686 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3687 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3688 g_apszIemNativeHstRegNames[idxRegNew]));
3689 idxReg = idxRegNew;
3690 }
3691 else
3692 {
3693 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3694 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3695 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3696 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3697 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3698 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3699 else
3700 {
3701 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3702 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3703 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3704 }
3705 }
3706 }
3707 else
3708 {
3709 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3710 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3711 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3712 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3713
3714 /*
3715 * Allocate a new register, copy the value and, if updating, the
3716 * guest shadow copy assignment to the new register.
3717 */
3718 /** @todo share register for readonly access. */
3719 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3720 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3721
3722 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3723 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3724
3725 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3726 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3727 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3728 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3729 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3730 else
3731 {
3732 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3733 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3734 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3735 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3736 }
3737 idxReg = idxRegNew;
3738 }
3739 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3740
3741#ifdef VBOX_STRICT
3742 /* Strict builds: Check that the value is correct. */
3743 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3744#endif
3745
3746 return idxReg;
3747 }
3748
3749 /*
3750 * Allocate a new register, load it with the guest value and designate it as a copy of the
3751 */
3752 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3753
3754 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3755 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3756
3757 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3758 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3759 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3760 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3761
3762 return idxRegNew;
3763}
3764
3765
3766/**
3767 * Allocates a temporary host general purpose register that already holds the
3768 * given guest register value.
3769 *
3770 * The use case for this function is places where the shadowing state cannot be
3771 * modified due to branching and such. This will fail if the we don't have a
3772 * current shadow copy handy or if it's incompatible. The only code that will
3773 * be emitted here is value checking code in strict builds.
3774 *
3775 * The intended use can only be readonly!
3776 *
3777 * @returns The host register number, UINT8_MAX if not present.
3778 * @param pReNative The native recompile state.
3779 * @param poff Pointer to the instruction buffer offset.
3780 * Will be updated in strict builds if a register is
3781 * found.
3782 * @param enmGstReg The guest register that will is to be updated.
3783 * @note In strict builds, this may throw instruction buffer growth failures.
3784 * Non-strict builds will not throw anything.
3785 * @sa iemNativeRegAllocTmpForGuestReg
3786 */
3787DECL_HIDDEN_THROW(uint8_t)
3788iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3789{
3790 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3791
3792 /*
3793 * First check if the guest register value is already in a host register.
3794 */
3795 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3796 {
3797 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3798 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3799 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3800 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3801
3802 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3803 {
3804 /*
3805 * We only do readonly use here, so easy compared to the other
3806 * variant of this code.
3807 */
3808 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3809 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3810 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3811 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3812 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3813
3814#ifdef VBOX_STRICT
3815 /* Strict builds: Check that the value is correct. */
3816 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3817#else
3818 RT_NOREF(poff);
3819#endif
3820 return idxReg;
3821 }
3822 }
3823
3824 return UINT8_MAX;
3825}
3826
3827
3828DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
3829
3830
3831/**
3832 * Allocates argument registers for a function call.
3833 *
3834 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3835 * need to check the return value.
3836 * @param pReNative The native recompile state.
3837 * @param off The current code buffer offset.
3838 * @param cArgs The number of arguments the function call takes.
3839 */
3840DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3841{
3842 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3843 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3844 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3845 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3846
3847 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3848 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3849 else if (cArgs == 0)
3850 return true;
3851
3852 /*
3853 * Do we get luck and all register are free and not shadowing anything?
3854 */
3855 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3856 for (uint32_t i = 0; i < cArgs; i++)
3857 {
3858 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3859 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3860 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3861 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3862 }
3863 /*
3864 * Okay, not lucky so we have to free up the registers.
3865 */
3866 else
3867 for (uint32_t i = 0; i < cArgs; i++)
3868 {
3869 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3870 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3871 {
3872 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3873 {
3874 case kIemNativeWhat_Var:
3875 {
3876 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3877 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
3878 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3879 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3880 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3881
3882 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3883 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3884 else
3885 {
3886 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3887 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3888 }
3889 break;
3890 }
3891
3892 case kIemNativeWhat_Tmp:
3893 case kIemNativeWhat_Arg:
3894 case kIemNativeWhat_rc:
3895 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3896 default:
3897 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3898 }
3899
3900 }
3901 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3902 {
3903 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3904 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3905 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3906 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3907 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3908 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3909 }
3910 else
3911 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3912 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3913 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3914 }
3915 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3916 return true;
3917}
3918
3919
3920DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3921
3922
3923#if 0
3924/**
3925 * Frees a register assignment of any type.
3926 *
3927 * @param pReNative The native recompile state.
3928 * @param idxHstReg The register to free.
3929 *
3930 * @note Does not update variables.
3931 */
3932DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3933{
3934 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3935 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3936 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3937 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3938 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3939 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3940 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3941 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3942 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3943 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3944 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3945 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3946 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3947 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3948
3949 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3950 /* no flushing, right:
3951 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3952 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3953 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3954 */
3955}
3956#endif
3957
3958
3959/**
3960 * Frees a temporary register.
3961 *
3962 * Any shadow copies of guest registers assigned to the host register will not
3963 * be flushed by this operation.
3964 */
3965DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3966{
3967 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3968 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3969 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3970 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3971 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3972}
3973
3974
3975/**
3976 * Frees a temporary immediate register.
3977 *
3978 * It is assumed that the call has not modified the register, so it still hold
3979 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3980 */
3981DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3982{
3983 iemNativeRegFreeTmp(pReNative, idxHstReg);
3984}
3985
3986
3987/**
3988 * Frees a register assigned to a variable.
3989 *
3990 * The register will be disassociated from the variable.
3991 */
3992DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3993{
3994 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3995 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3996 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3998 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
3999
4000 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4001 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4002 if (!fFlushShadows)
4003 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4004 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4005 else
4006 {
4007 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4008 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4010 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4011 uint64_t fGstRegShadows = fGstRegShadowsOld;
4012 while (fGstRegShadows)
4013 {
4014 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4015 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4016
4017 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4018 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4019 }
4020 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4021 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4022 }
4023}
4024
4025
4026/**
4027 * Called right before emitting a call instruction to move anything important
4028 * out of call-volatile registers, free and flush the call-volatile registers,
4029 * optionally freeing argument variables.
4030 *
4031 * @returns New code buffer offset, UINT32_MAX on failure.
4032 * @param pReNative The native recompile state.
4033 * @param off The code buffer offset.
4034 * @param cArgs The number of arguments the function call takes.
4035 * It is presumed that the host register part of these have
4036 * been allocated as such already and won't need moving,
4037 * just freeing.
4038 * @param fKeepVars Mask of variables that should keep their register
4039 * assignments. Caller must take care to handle these.
4040 */
4041DECL_HIDDEN_THROW(uint32_t)
4042iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4043{
4044 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4045
4046 /* fKeepVars will reduce this mask. */
4047 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4048
4049 /*
4050 * Move anything important out of volatile registers.
4051 */
4052 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4053 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4054 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4055#ifdef IEMNATIVE_REG_FIXED_TMP0
4056 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4057#endif
4058 & ~g_afIemNativeCallRegs[cArgs];
4059
4060 fRegsToMove &= pReNative->Core.bmHstRegs;
4061 if (!fRegsToMove)
4062 { /* likely */ }
4063 else
4064 {
4065 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4066 while (fRegsToMove != 0)
4067 {
4068 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4069 fRegsToMove &= ~RT_BIT_32(idxReg);
4070
4071 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4072 {
4073 case kIemNativeWhat_Var:
4074 {
4075 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4076 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4077 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4078 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4079 if (!(RT_BIT_32(idxVar) & fKeepVars))
4080 {
4081 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4082 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4083 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4084 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4085 else
4086 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4087 }
4088 else
4089 fRegsToFree &= ~RT_BIT_32(idxReg);
4090 continue;
4091 }
4092
4093 case kIemNativeWhat_Arg:
4094 AssertMsgFailed(("What?!?: %u\n", idxReg));
4095 continue;
4096
4097 case kIemNativeWhat_rc:
4098 case kIemNativeWhat_Tmp:
4099 AssertMsgFailed(("Missing free: %u\n", idxReg));
4100 continue;
4101
4102 case kIemNativeWhat_FixedTmp:
4103 case kIemNativeWhat_pVCpuFixed:
4104 case kIemNativeWhat_pCtxFixed:
4105 case kIemNativeWhat_FixedReserved:
4106 case kIemNativeWhat_Invalid:
4107 case kIemNativeWhat_End:
4108 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4109 }
4110 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4111 }
4112 }
4113
4114 /*
4115 * Do the actual freeing.
4116 */
4117 if (pReNative->Core.bmHstRegs & fRegsToFree)
4118 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4119 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4120 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4121
4122 /* If there are guest register shadows in any call-volatile register, we
4123 have to clear the corrsponding guest register masks for each register. */
4124 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4125 if (fHstRegsWithGstShadow)
4126 {
4127 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4128 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4129 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4130 do
4131 {
4132 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4133 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4134
4135 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4136 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4137 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4138 } while (fHstRegsWithGstShadow != 0);
4139 }
4140
4141 return off;
4142}
4143
4144
4145/**
4146 * Flushes a set of guest register shadow copies.
4147 *
4148 * This is usually done after calling a threaded function or a C-implementation
4149 * of an instruction.
4150 *
4151 * @param pReNative The native recompile state.
4152 * @param fGstRegs Set of guest registers to flush.
4153 */
4154DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4155{
4156 /*
4157 * Reduce the mask by what's currently shadowed
4158 */
4159 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4160 fGstRegs &= bmGstRegShadowsOld;
4161 if (fGstRegs)
4162 {
4163 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4164 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4165 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4166 if (bmGstRegShadowsNew)
4167 {
4168 /*
4169 * Partial.
4170 */
4171 do
4172 {
4173 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4174 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4175 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4176 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4177 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4178
4179 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4180 fGstRegs &= ~fInThisHstReg;
4181 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4182 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4183 if (!fGstRegShadowsNew)
4184 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4185 } while (fGstRegs != 0);
4186 }
4187 else
4188 {
4189 /*
4190 * Clear all.
4191 */
4192 do
4193 {
4194 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4195 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4196 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4197 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4198 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4199
4200 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4201 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4202 } while (fGstRegs != 0);
4203 pReNative->Core.bmHstRegsWithGstShadow = 0;
4204 }
4205 }
4206}
4207
4208
4209/**
4210 * Flushes delayed write of a specific guest register.
4211 *
4212 * This must be called prior to calling CImpl functions and any helpers that use
4213 * the guest state (like raising exceptions) and such.
4214 *
4215 * This optimization has not yet been implemented. The first target would be
4216 * RIP updates, since these are the most common ones.
4217 */
4218DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4219 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4220{
4221 RT_NOREF(pReNative, enmClass, idxReg);
4222 return off;
4223}
4224
4225
4226/**
4227 * Flushes any delayed guest register writes.
4228 *
4229 * This must be called prior to calling CImpl functions and any helpers that use
4230 * the guest state (like raising exceptions) and such.
4231 *
4232 * This optimization has not yet been implemented. The first target would be
4233 * RIP updates, since these are the most common ones.
4234 */
4235DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4236{
4237 RT_NOREF(pReNative, off);
4238 return off;
4239}
4240
4241
4242#ifdef VBOX_STRICT
4243/**
4244 * Does internal register allocator sanity checks.
4245 */
4246static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
4247{
4248 /*
4249 * Iterate host registers building a guest shadowing set.
4250 */
4251 uint64_t bmGstRegShadows = 0;
4252 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
4253 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
4254 while (bmHstRegsWithGstShadow)
4255 {
4256 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
4257 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4258 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4259
4260 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4261 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
4262 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
4263 bmGstRegShadows |= fThisGstRegShadows;
4264 while (fThisGstRegShadows)
4265 {
4266 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
4267 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
4268 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
4269 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
4270 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
4271 }
4272 }
4273 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
4274 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
4275 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
4276
4277 /*
4278 * Now the other way around, checking the guest to host index array.
4279 */
4280 bmHstRegsWithGstShadow = 0;
4281 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
4282 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4283 while (bmGstRegShadows)
4284 {
4285 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
4286 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4287 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
4288
4289 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4290 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
4291 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
4292 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
4293 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4294 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4295 }
4296 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4297 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4298 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4299}
4300#endif
4301
4302
4303/*********************************************************************************************************************************
4304* Code Emitters (larger snippets) *
4305*********************************************************************************************************************************/
4306
4307/**
4308 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4309 * extending to 64-bit width.
4310 *
4311 * @returns New code buffer offset on success, UINT32_MAX on failure.
4312 * @param pReNative .
4313 * @param off The current code buffer position.
4314 * @param idxHstReg The host register to load the guest register value into.
4315 * @param enmGstReg The guest register to load.
4316 *
4317 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4318 * that is something the caller needs to do if applicable.
4319 */
4320DECL_HIDDEN_THROW(uint32_t)
4321iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4322{
4323 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4324 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4325
4326 switch (g_aGstShadowInfo[enmGstReg].cb)
4327 {
4328 case sizeof(uint64_t):
4329 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4330 case sizeof(uint32_t):
4331 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4332 case sizeof(uint16_t):
4333 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4334#if 0 /* not present in the table. */
4335 case sizeof(uint8_t):
4336 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4337#endif
4338 default:
4339 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4340 }
4341}
4342
4343
4344#ifdef VBOX_STRICT
4345/**
4346 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4347 *
4348 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4349 * Trashes EFLAGS on AMD64.
4350 */
4351static uint32_t
4352iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4353{
4354# ifdef RT_ARCH_AMD64
4355 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4356
4357 /* rol reg64, 32 */
4358 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4359 pbCodeBuf[off++] = 0xc1;
4360 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4361 pbCodeBuf[off++] = 32;
4362
4363 /* test reg32, ffffffffh */
4364 if (idxReg >= 8)
4365 pbCodeBuf[off++] = X86_OP_REX_B;
4366 pbCodeBuf[off++] = 0xf7;
4367 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4368 pbCodeBuf[off++] = 0xff;
4369 pbCodeBuf[off++] = 0xff;
4370 pbCodeBuf[off++] = 0xff;
4371 pbCodeBuf[off++] = 0xff;
4372
4373 /* je/jz +1 */
4374 pbCodeBuf[off++] = 0x74;
4375 pbCodeBuf[off++] = 0x01;
4376
4377 /* int3 */
4378 pbCodeBuf[off++] = 0xcc;
4379
4380 /* rol reg64, 32 */
4381 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4382 pbCodeBuf[off++] = 0xc1;
4383 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4384 pbCodeBuf[off++] = 32;
4385
4386# elif defined(RT_ARCH_ARM64)
4387 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4388 /* lsr tmp0, reg64, #32 */
4389 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4390 /* cbz tmp0, +1 */
4391 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4392 /* brk #0x1100 */
4393 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4394
4395# else
4396# error "Port me!"
4397# endif
4398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4399 return off;
4400}
4401#endif /* VBOX_STRICT */
4402
4403
4404#ifdef VBOX_STRICT
4405/**
4406 * Emitting code that checks that the content of register @a idxReg is the same
4407 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4408 * instruction if that's not the case.
4409 *
4410 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4411 * Trashes EFLAGS on AMD64.
4412 */
4413static uint32_t
4414iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
4415{
4416# ifdef RT_ARCH_AMD64
4417 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4418
4419 /* cmp reg, [mem] */
4420 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
4421 {
4422 if (idxReg >= 8)
4423 pbCodeBuf[off++] = X86_OP_REX_R;
4424 pbCodeBuf[off++] = 0x38;
4425 }
4426 else
4427 {
4428 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
4429 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
4430 else
4431 {
4432 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
4433 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4434 else
4435 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
4436 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
4437 if (idxReg >= 8)
4438 pbCodeBuf[off++] = X86_OP_REX_R;
4439 }
4440 pbCodeBuf[off++] = 0x39;
4441 }
4442 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
4443
4444 /* je/jz +1 */
4445 pbCodeBuf[off++] = 0x74;
4446 pbCodeBuf[off++] = 0x01;
4447
4448 /* int3 */
4449 pbCodeBuf[off++] = 0xcc;
4450
4451 /* For values smaller than the register size, we must check that the rest
4452 of the register is all zeros. */
4453 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
4454 {
4455 /* test reg64, imm32 */
4456 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4457 pbCodeBuf[off++] = 0xf7;
4458 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4459 pbCodeBuf[off++] = 0;
4460 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
4461 pbCodeBuf[off++] = 0xff;
4462 pbCodeBuf[off++] = 0xff;
4463
4464 /* je/jz +1 */
4465 pbCodeBuf[off++] = 0x74;
4466 pbCodeBuf[off++] = 0x01;
4467
4468 /* int3 */
4469 pbCodeBuf[off++] = 0xcc;
4470 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4471 }
4472 else
4473 {
4474 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4475 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
4476 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
4477 }
4478
4479# elif defined(RT_ARCH_ARM64)
4480 /* mov TMP0, [gstreg] */
4481 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
4482
4483 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4484 /* sub tmp0, tmp0, idxReg */
4485 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
4486 /* cbz tmp0, +1 */
4487 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4488 /* brk #0x1000+enmGstReg */
4489 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
4490 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4491
4492# else
4493# error "Port me!"
4494# endif
4495 return off;
4496}
4497#endif /* VBOX_STRICT */
4498
4499
4500#ifdef VBOX_STRICT
4501/**
4502 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
4503 * important bits.
4504 *
4505 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4506 * Trashes EFLAGS on AMD64.
4507 */
4508static uint32_t
4509iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
4510{
4511 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4512 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
4513 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
4514 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
4515
4516#ifdef RT_ARCH_AMD64
4517 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4518
4519 /* je/jz +1 */
4520 pbCodeBuf[off++] = 0x74;
4521 pbCodeBuf[off++] = 0x01;
4522
4523 /* int3 */
4524 pbCodeBuf[off++] = 0xcc;
4525
4526# elif defined(RT_ARCH_ARM64)
4527 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4528
4529 /* b.eq +1 */
4530 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
4531 /* brk #0x2000 */
4532 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
4533
4534# else
4535# error "Port me!"
4536# endif
4537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4538
4539 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4540 return off;
4541}
4542#endif /* VBOX_STRICT */
4543
4544
4545/**
4546 * Emits a code for checking the return code of a call and rcPassUp, returning
4547 * from the code if either are non-zero.
4548 */
4549DECL_HIDDEN_THROW(uint32_t)
4550iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
4551{
4552#ifdef RT_ARCH_AMD64
4553 /*
4554 * AMD64: eax = call status code.
4555 */
4556
4557 /* edx = rcPassUp */
4558 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4559# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4560 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
4561# endif
4562
4563 /* edx = eax | rcPassUp */
4564 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4565 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
4566 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
4567 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4568
4569 /* Jump to non-zero status return path. */
4570 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
4571
4572 /* done. */
4573
4574#elif RT_ARCH_ARM64
4575 /*
4576 * ARM64: w0 = call status code.
4577 */
4578# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4579 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
4580# endif
4581 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4582
4583 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4584
4585 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
4586
4587 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4588 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
4589 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
4590
4591#else
4592# error "port me"
4593#endif
4594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4595 return off;
4596}
4597
4598
4599/**
4600 * Emits code to check if the content of @a idxAddrReg is a canonical address,
4601 * raising a \#GP(0) if it isn't.
4602 *
4603 * @returns New code buffer offset, UINT32_MAX on failure.
4604 * @param pReNative The native recompile state.
4605 * @param off The code buffer offset.
4606 * @param idxAddrReg The host register with the address to check.
4607 * @param idxInstr The current instruction.
4608 */
4609DECL_HIDDEN_THROW(uint32_t)
4610iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
4611{
4612 /*
4613 * Make sure we don't have any outstanding guest register writes as we may
4614 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4615 */
4616 off = iemNativeRegFlushPendingWrites(pReNative, off);
4617
4618#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4619 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4620#else
4621 RT_NOREF(idxInstr);
4622#endif
4623
4624#ifdef RT_ARCH_AMD64
4625 /*
4626 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
4627 * return raisexcpt();
4628 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
4629 */
4630 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4631
4632 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
4633 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
4634 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
4635 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
4636 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4637
4638 iemNativeRegFreeTmp(pReNative, iTmpReg);
4639
4640#elif defined(RT_ARCH_ARM64)
4641 /*
4642 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
4643 * return raisexcpt();
4644 * ----
4645 * mov x1, 0x800000000000
4646 * add x1, x0, x1
4647 * cmp xzr, x1, lsr 48
4648 * b.ne .Lraisexcpt
4649 */
4650 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4651
4652 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
4653 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
4654 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
4655 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4656
4657 iemNativeRegFreeTmp(pReNative, iTmpReg);
4658
4659#else
4660# error "Port me"
4661#endif
4662 return off;
4663}
4664
4665
4666/**
4667 * Emits code to check if the content of @a idxAddrReg is within the limit of
4668 * idxSegReg, raising a \#GP(0) if it isn't.
4669 *
4670 * @returns New code buffer offset; throws VBox status code on error.
4671 * @param pReNative The native recompile state.
4672 * @param off The code buffer offset.
4673 * @param idxAddrReg The host register (32-bit) with the address to
4674 * check.
4675 * @param idxSegReg The segment register (X86_SREG_XXX) to check
4676 * against.
4677 * @param idxInstr The current instruction.
4678 */
4679DECL_HIDDEN_THROW(uint32_t)
4680iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4681 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
4682{
4683 /*
4684 * Make sure we don't have any outstanding guest register writes as we may
4685 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4686 */
4687 off = iemNativeRegFlushPendingWrites(pReNative, off);
4688
4689#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4690 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4691#else
4692 RT_NOREF(idxInstr);
4693#endif
4694
4695 /** @todo implement expand down/whatnot checking */
4696 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
4697
4698 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4699 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
4700 kIemNativeGstRegUse_ForUpdate);
4701
4702 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
4703 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4704
4705 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
4706 return off;
4707}
4708
4709
4710/**
4711 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
4712 *
4713 * @returns The flush mask.
4714 * @param fCImpl The IEM_CIMPL_F_XXX flags.
4715 * @param fGstShwFlush The starting flush mask.
4716 */
4717DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
4718{
4719 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
4720 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
4721 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
4722 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
4723 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
4724 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
4725 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
4726 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
4727 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
4728 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
4729 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
4730 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
4731 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4732 return fGstShwFlush;
4733}
4734
4735
4736/**
4737 * Emits a call to a CImpl function or something similar.
4738 */
4739DECL_HIDDEN_THROW(uint32_t)
4740iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
4741 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
4742{
4743 /*
4744 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
4745 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
4746 */
4747 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
4748 fGstShwFlush
4749 | RT_BIT_64(kIemNativeGstReg_Pc)
4750 | RT_BIT_64(kIemNativeGstReg_EFlags));
4751 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4752
4753 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4754
4755 /*
4756 * Load the parameters.
4757 */
4758#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
4759 /* Special code the hidden VBOXSTRICTRC pointer. */
4760 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4761 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4762 if (cAddParams > 0)
4763 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
4764 if (cAddParams > 1)
4765 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
4766 if (cAddParams > 2)
4767 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
4768 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4769
4770#else
4771 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4772 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4773 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4774 if (cAddParams > 0)
4775 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
4776 if (cAddParams > 1)
4777 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
4778 if (cAddParams > 2)
4779# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
4780 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
4781# else
4782 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
4783# endif
4784#endif
4785
4786 /*
4787 * Make the call.
4788 */
4789 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
4790
4791#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4792 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4793#endif
4794
4795 /*
4796 * Check the status code.
4797 */
4798 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4799}
4800
4801
4802/**
4803 * Emits a call to a threaded worker function.
4804 */
4805DECL_HIDDEN_THROW(uint32_t)
4806iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
4807{
4808 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
4809 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4810
4811#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4812 /* The threaded function may throw / long jmp, so set current instruction
4813 number if we're counting. */
4814 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4815#endif
4816
4817 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
4818
4819#ifdef RT_ARCH_AMD64
4820 /* Load the parameters and emit the call. */
4821# ifdef RT_OS_WINDOWS
4822# ifndef VBOXSTRICTRC_STRICT_ENABLED
4823 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4824 if (cParams > 0)
4825 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
4826 if (cParams > 1)
4827 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
4828 if (cParams > 2)
4829 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
4830# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
4831 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
4832 if (cParams > 0)
4833 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
4834 if (cParams > 1)
4835 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
4836 if (cParams > 2)
4837 {
4838 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
4839 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
4840 }
4841 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4842# endif /* VBOXSTRICTRC_STRICT_ENABLED */
4843# else
4844 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4845 if (cParams > 0)
4846 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
4847 if (cParams > 1)
4848 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
4849 if (cParams > 2)
4850 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
4851# endif
4852
4853 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4854
4855# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4856 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4857# endif
4858
4859#elif RT_ARCH_ARM64
4860 /*
4861 * ARM64:
4862 */
4863 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4864 if (cParams > 0)
4865 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
4866 if (cParams > 1)
4867 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
4868 if (cParams > 2)
4869 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
4870
4871 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4872
4873#else
4874# error "port me"
4875#endif
4876
4877 /*
4878 * Check the status code.
4879 */
4880 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
4881
4882 return off;
4883}
4884
4885
4886/**
4887 * Emits the code at the CheckBranchMiss label.
4888 */
4889static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4890{
4891 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
4892 if (idxLabel != UINT32_MAX)
4893 {
4894 iemNativeLabelDefine(pReNative, idxLabel, off);
4895
4896 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
4897 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4898 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
4899
4900 /* jump back to the return sequence. */
4901 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4902 }
4903 return off;
4904}
4905
4906
4907/**
4908 * Emits the code at the NeedCsLimChecking label.
4909 */
4910static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4911{
4912 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
4913 if (idxLabel != UINT32_MAX)
4914 {
4915 iemNativeLabelDefine(pReNative, idxLabel, off);
4916
4917 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
4918 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4919 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
4920
4921 /* jump back to the return sequence. */
4922 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4923 }
4924 return off;
4925}
4926
4927
4928/**
4929 * Emits the code at the ObsoleteTb label.
4930 */
4931static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4932{
4933 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
4934 if (idxLabel != UINT32_MAX)
4935 {
4936 iemNativeLabelDefine(pReNative, idxLabel, off);
4937
4938 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
4939 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4940 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
4941
4942 /* jump back to the return sequence. */
4943 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4944 }
4945 return off;
4946}
4947
4948
4949/**
4950 * Emits the code at the RaiseGP0 label.
4951 */
4952static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4953{
4954 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
4955 if (idxLabel != UINT32_MAX)
4956 {
4957 iemNativeLabelDefine(pReNative, idxLabel, off);
4958
4959 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
4960 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4961 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
4962
4963 /* jump back to the return sequence. */
4964 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4965 }
4966 return off;
4967}
4968
4969
4970/**
4971 * Emits the code at the ReturnWithFlags label (returns
4972 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
4973 */
4974static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4975{
4976 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
4977 if (idxLabel != UINT32_MAX)
4978 {
4979 iemNativeLabelDefine(pReNative, idxLabel, off);
4980
4981 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
4982
4983 /* jump back to the return sequence. */
4984 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4985 }
4986 return off;
4987}
4988
4989
4990/**
4991 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
4992 */
4993static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4994{
4995 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
4996 if (idxLabel != UINT32_MAX)
4997 {
4998 iemNativeLabelDefine(pReNative, idxLabel, off);
4999
5000 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5001
5002 /* jump back to the return sequence. */
5003 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5004 }
5005 return off;
5006}
5007
5008
5009/**
5010 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5011 */
5012static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5013{
5014 /*
5015 * Generate the rc + rcPassUp fiddling code if needed.
5016 */
5017 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5018 if (idxLabel != UINT32_MAX)
5019 {
5020 iemNativeLabelDefine(pReNative, idxLabel, off);
5021
5022 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5023#ifdef RT_ARCH_AMD64
5024# ifdef RT_OS_WINDOWS
5025# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5026 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5027# endif
5028 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5029 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5030# else
5031 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5032 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5033# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5034 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5035# endif
5036# endif
5037# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5038 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5039# endif
5040
5041#else
5042 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5043 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5044 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5045#endif
5046
5047 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5048 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5049 }
5050 return off;
5051}
5052
5053
5054/**
5055 * Emits a standard epilog.
5056 */
5057static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5058{
5059 *pidxReturnLabel = UINT32_MAX;
5060
5061 /*
5062 * Successful return, so clear the return register (eax, w0).
5063 */
5064 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5065
5066 /*
5067 * Define label for common return point.
5068 */
5069 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5070 *pidxReturnLabel = idxReturn;
5071
5072 /*
5073 * Restore registers and return.
5074 */
5075#ifdef RT_ARCH_AMD64
5076 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5077
5078 /* Reposition esp at the r15 restore point. */
5079 pbCodeBuf[off++] = X86_OP_REX_W;
5080 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5081 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5082 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5083
5084 /* Pop non-volatile registers and return */
5085 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5086 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5087 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5088 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5089 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5090 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5091 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5092 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5093# ifdef RT_OS_WINDOWS
5094 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5095 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5096# endif
5097 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5098 pbCodeBuf[off++] = 0xc9; /* leave */
5099 pbCodeBuf[off++] = 0xc3; /* ret */
5100 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5101
5102#elif RT_ARCH_ARM64
5103 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5104
5105 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5106 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5107 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5108 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5109 IEMNATIVE_FRAME_VAR_SIZE / 8);
5110 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5111 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5112 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5113 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5114 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5115 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5116 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5117 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5118 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5119 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5120 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5121 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5122
5123 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5124 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5125 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5126 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5127
5128 /* retab / ret */
5129# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5130 if (1)
5131 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5132 else
5133# endif
5134 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5135
5136#else
5137# error "port me"
5138#endif
5139 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5140
5141 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5142}
5143
5144
5145/**
5146 * Emits a standard prolog.
5147 */
5148static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5149{
5150#ifdef RT_ARCH_AMD64
5151 /*
5152 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5153 * reserving 64 bytes for stack variables plus 4 non-register argument
5154 * slots. Fixed register assignment: xBX = pReNative;
5155 *
5156 * Since we always do the same register spilling, we can use the same
5157 * unwind description for all the code.
5158 */
5159 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5160 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5161 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5162 pbCodeBuf[off++] = 0x8b;
5163 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5164 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5165 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5166# ifdef RT_OS_WINDOWS
5167 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5168 pbCodeBuf[off++] = 0x8b;
5169 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5170 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5171 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5172# else
5173 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5174 pbCodeBuf[off++] = 0x8b;
5175 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5176# endif
5177 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5178 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5179 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5180 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5181 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5182 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5183 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5184 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5185
5186 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5187 X86_GREG_xSP,
5188 IEMNATIVE_FRAME_ALIGN_SIZE
5189 + IEMNATIVE_FRAME_VAR_SIZE
5190 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5191 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5192 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5193 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5194 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5195
5196#elif RT_ARCH_ARM64
5197 /*
5198 * We set up a stack frame exactly like on x86, only we have to push the
5199 * return address our selves here. We save all non-volatile registers.
5200 */
5201 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5202
5203# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5204 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5205 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5206 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5207 /* pacibsp */
5208 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5209# endif
5210
5211 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5212 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5213 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5214 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5215 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5216 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5217 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5218 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5219 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5220 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5221 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5222 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5223 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5224 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5225 /* Save the BP and LR (ret address) registers at the top of the frame. */
5226 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5227 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5228 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5229 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5230 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
5231 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
5232
5233 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
5234 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
5235
5236 /* mov r28, r0 */
5237 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
5238 /* mov r27, r1 */
5239 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
5240
5241#else
5242# error "port me"
5243#endif
5244 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5245 return off;
5246}
5247
5248
5249
5250
5251/*********************************************************************************************************************************
5252* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
5253*********************************************************************************************************************************/
5254
5255#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
5256 { \
5257 Assert(pReNative->Core.bmVars == 0); \
5258 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
5259 Assert(pReNative->Core.bmStack == 0); \
5260 pReNative->fMc = (a_fMcFlags); \
5261 pReNative->fCImpl = (a_fCImplFlags); \
5262 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
5263
5264/** We have to get to the end in recompilation mode, as otherwise we won't
5265 * generate code for all the IEM_MC_IF_XXX branches. */
5266#define IEM_MC_END() \
5267 iemNativeVarFreeAll(pReNative); \
5268 } return off
5269
5270
5271
5272/*********************************************************************************************************************************
5273* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
5274*********************************************************************************************************************************/
5275
5276#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
5277 pReNative->fMc = 0; \
5278 pReNative->fCImpl = (a_fFlags); \
5279 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
5280
5281
5282#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5283 pReNative->fMc = 0; \
5284 pReNative->fCImpl = (a_fFlags); \
5285 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
5286
5287DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5288 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5289 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
5290{
5291 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
5292}
5293
5294
5295#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5296 pReNative->fMc = 0; \
5297 pReNative->fCImpl = (a_fFlags); \
5298 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5299 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
5300
5301DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5302 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5303 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
5304{
5305 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
5306}
5307
5308
5309#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5310 pReNative->fMc = 0; \
5311 pReNative->fCImpl = (a_fFlags); \
5312 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5313 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
5314
5315DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5316 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5317 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
5318 uint64_t uArg2)
5319{
5320 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
5321}
5322
5323
5324
5325/*********************************************************************************************************************************
5326* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
5327*********************************************************************************************************************************/
5328
5329/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
5330 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5331DECL_INLINE_THROW(uint32_t)
5332iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5333{
5334 /*
5335 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5336 * return with special status code and make the execution loop deal with
5337 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5338 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5339 * could continue w/o interruption, it probably will drop into the
5340 * debugger, so not worth the effort of trying to services it here and we
5341 * just lump it in with the handling of the others.
5342 *
5343 * To simplify the code and the register state management even more (wrt
5344 * immediate in AND operation), we always update the flags and skip the
5345 * extra check associated conditional jump.
5346 */
5347 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5348 <= UINT32_MAX);
5349 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5350 kIemNativeGstRegUse_ForUpdate);
5351 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5352 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5353 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5354 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5355 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5356
5357 /* Free but don't flush the EFLAGS register. */
5358 iemNativeRegFreeTmp(pReNative, idxEflReg);
5359
5360 return off;
5361}
5362
5363
5364#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
5365 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5366
5367#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
5368 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
5369 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5370
5371/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5372DECL_INLINE_THROW(uint32_t)
5373iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5374{
5375 /* Allocate a temporary PC register. */
5376 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5377
5378 /* Perform the addition and store the result. */
5379 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5380 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5381
5382 /* Free but don't flush the PC register. */
5383 iemNativeRegFreeTmp(pReNative, idxPcReg);
5384
5385 return off;
5386}
5387
5388
5389#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
5390 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5391
5392#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
5393 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
5394 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5395
5396/** Same as iemRegAddToEip32AndFinishingNoFlags. */
5397DECL_INLINE_THROW(uint32_t)
5398iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5399{
5400 /* Allocate a temporary PC register. */
5401 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5402
5403 /* Perform the addition and store the result. */
5404 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5405 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5406
5407 /* Free but don't flush the PC register. */
5408 iemNativeRegFreeTmp(pReNative, idxPcReg);
5409
5410 return off;
5411}
5412
5413
5414#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
5415 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5416
5417#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
5418 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
5419 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5420
5421/** Same as iemRegAddToIp16AndFinishingNoFlags. */
5422DECL_INLINE_THROW(uint32_t)
5423iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5424{
5425 /* Allocate a temporary PC register. */
5426 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5427
5428 /* Perform the addition and store the result. */
5429 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5430 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5431 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5432
5433 /* Free but don't flush the PC register. */
5434 iemNativeRegFreeTmp(pReNative, idxPcReg);
5435
5436 return off;
5437}
5438
5439
5440
5441/*********************************************************************************************************************************
5442* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
5443*********************************************************************************************************************************/
5444
5445#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
5446 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5447 (a_enmEffOpSize), pCallEntry->idxInstr)
5448
5449#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5450 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
5451 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5452
5453#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
5454 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5455 IEMMODE_16BIT, pCallEntry->idxInstr)
5456
5457#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
5458 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
5459 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5460
5461#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
5462 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5463 IEMMODE_64BIT, pCallEntry->idxInstr)
5464
5465#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
5466 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
5467 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5468
5469/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
5470 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
5471 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
5472DECL_INLINE_THROW(uint32_t)
5473iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5474 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5475{
5476 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
5477
5478 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5479 off = iemNativeRegFlushPendingWrites(pReNative, off);
5480
5481 /* Allocate a temporary PC register. */
5482 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5483
5484 /* Perform the addition. */
5485 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
5486
5487 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
5488 {
5489 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5490 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5491 }
5492 else
5493 {
5494 /* Just truncate the result to 16-bit IP. */
5495 Assert(enmEffOpSize == IEMMODE_16BIT);
5496 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5497 }
5498 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5499
5500 /* Free but don't flush the PC register. */
5501 iemNativeRegFreeTmp(pReNative, idxPcReg);
5502
5503 return off;
5504}
5505
5506
5507#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
5508 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5509 (a_enmEffOpSize), pCallEntry->idxInstr)
5510
5511#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5512 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
5513 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5514
5515#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
5516 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5517 IEMMODE_16BIT, pCallEntry->idxInstr)
5518
5519#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
5520 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
5521 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5522
5523#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
5524 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5525 IEMMODE_32BIT, pCallEntry->idxInstr)
5526
5527#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
5528 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
5529 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5530
5531/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
5532 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
5533 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
5534DECL_INLINE_THROW(uint32_t)
5535iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5536 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5537{
5538 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
5539
5540 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5541 off = iemNativeRegFlushPendingWrites(pReNative, off);
5542
5543 /* Allocate a temporary PC register. */
5544 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5545
5546 /* Perform the addition. */
5547 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5548
5549 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
5550 if (enmEffOpSize == IEMMODE_16BIT)
5551 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5552
5553 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
5554 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5555
5556 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5557
5558 /* Free but don't flush the PC register. */
5559 iemNativeRegFreeTmp(pReNative, idxPcReg);
5560
5561 return off;
5562}
5563
5564
5565#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
5566 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
5567
5568#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
5569 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
5570 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5571
5572#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
5573 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
5574
5575#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
5576 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
5577 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5578
5579#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
5580 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
5581
5582#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
5583 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
5584 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5585
5586/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
5587DECL_INLINE_THROW(uint32_t)
5588iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5589 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
5590{
5591 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5592 off = iemNativeRegFlushPendingWrites(pReNative, off);
5593
5594 /* Allocate a temporary PC register. */
5595 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5596
5597 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
5598 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5599 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5600 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5601 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5602
5603 /* Free but don't flush the PC register. */
5604 iemNativeRegFreeTmp(pReNative, idxPcReg);
5605
5606 return off;
5607}
5608
5609
5610
5611/*********************************************************************************************************************************
5612* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
5613*********************************************************************************************************************************/
5614
5615/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
5616#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
5617 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5618
5619/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
5620#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
5621 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5622
5623/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
5624#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
5625 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5626
5627/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
5628 * clears flags. */
5629#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
5630 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
5631 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5632
5633/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
5634 * clears flags. */
5635#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
5636 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
5637 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5638
5639/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
5640 * clears flags. */
5641#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
5642 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
5643 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5644
5645#undef IEM_MC_SET_RIP_U16_AND_FINISH
5646
5647
5648/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
5649#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
5650 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5651
5652/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
5653#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
5654 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5655
5656/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
5657 * clears flags. */
5658#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
5659 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
5660 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5661
5662/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
5663 * and clears flags. */
5664#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
5665 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
5666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5667
5668#undef IEM_MC_SET_RIP_U32_AND_FINISH
5669
5670
5671/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
5672#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
5673 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
5674
5675/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
5676 * and clears flags. */
5677#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
5678 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
5679 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5680
5681#undef IEM_MC_SET_RIP_U64_AND_FINISH
5682
5683
5684/** Same as iemRegRipJumpU16AndFinishNoFlags,
5685 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
5686DECL_INLINE_THROW(uint32_t)
5687iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
5688 uint8_t idxInstr, uint8_t cbVar)
5689{
5690 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
5691 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
5692
5693 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5694 off = iemNativeRegFlushPendingWrites(pReNative, off);
5695
5696 /* Get a register with the new PC loaded from idxVarPc.
5697 Note! This ASSUMES that the high bits of the GPR is zeroed. */
5698 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
5699
5700 /* Check limit (may #GP(0) + exit TB). */
5701 if (!f64Bit)
5702 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5703 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5704 else if (cbVar > sizeof(uint32_t))
5705 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5706
5707 /* Store the result. */
5708 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5709
5710 /** @todo implictly free the variable? */
5711
5712 return off;
5713}
5714
5715
5716
5717/*********************************************************************************************************************************
5718* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
5719*********************************************************************************************************************************/
5720
5721/**
5722 * Pushes an IEM_MC_IF_XXX onto the condition stack.
5723 *
5724 * @returns Pointer to the condition stack entry on success, NULL on failure
5725 * (too many nestings)
5726 */
5727DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
5728{
5729 uint32_t const idxStack = pReNative->cCondDepth;
5730 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
5731
5732 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
5733 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
5734
5735 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
5736 pEntry->fInElse = false;
5737 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
5738 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
5739
5740 return pEntry;
5741}
5742
5743
5744/**
5745 * Start of the if-block, snapshotting the register and variable state.
5746 */
5747DECL_INLINE_THROW(void)
5748iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
5749{
5750 Assert(offIfBlock != UINT32_MAX);
5751 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5752 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5753 Assert(!pEntry->fInElse);
5754
5755 /* Define the start of the IF block if request or for disassembly purposes. */
5756 if (idxLabelIf != UINT32_MAX)
5757 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
5758#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5759 else
5760 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
5761#else
5762 RT_NOREF(offIfBlock);
5763#endif
5764
5765 /* Copy the initial state so we can restore it in the 'else' block. */
5766 pEntry->InitialState = pReNative->Core;
5767}
5768
5769
5770#define IEM_MC_ELSE() } while (0); \
5771 off = iemNativeEmitElse(pReNative, off); \
5772 do {
5773
5774/** Emits code related to IEM_MC_ELSE. */
5775DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5776{
5777 /* Check sanity and get the conditional stack entry. */
5778 Assert(off != UINT32_MAX);
5779 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5780 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5781 Assert(!pEntry->fInElse);
5782
5783 /* Jump to the endif */
5784 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
5785
5786 /* Define the else label and enter the else part of the condition. */
5787 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5788 pEntry->fInElse = true;
5789
5790 /* Snapshot the core state so we can do a merge at the endif and restore
5791 the snapshot we took at the start of the if-block. */
5792 pEntry->IfFinalState = pReNative->Core;
5793 pReNative->Core = pEntry->InitialState;
5794
5795 return off;
5796}
5797
5798
5799#define IEM_MC_ENDIF() } while (0); \
5800 off = iemNativeEmitEndIf(pReNative, off)
5801
5802/** Emits code related to IEM_MC_ENDIF. */
5803DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5804{
5805 /* Check sanity and get the conditional stack entry. */
5806 Assert(off != UINT32_MAX);
5807 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5808 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5809
5810 /*
5811 * Now we have find common group with the core state at the end of the
5812 * if-final. Use the smallest common denominator and just drop anything
5813 * that isn't the same in both states.
5814 */
5815 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
5816 * which is why we're doing this at the end of the else-block.
5817 * But we'd need more info about future for that to be worth the effort. */
5818 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
5819 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
5820 {
5821 /* shadow guest stuff first. */
5822 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
5823 if (fGstRegs)
5824 {
5825 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
5826 do
5827 {
5828 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5829 fGstRegs &= ~RT_BIT_64(idxGstReg);
5830
5831 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5832 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
5833 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
5834 {
5835 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
5836 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
5837 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
5838 }
5839 } while (fGstRegs);
5840 }
5841 else
5842 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
5843
5844 /* Check variables next. For now we must require them to be identical
5845 or stuff we can recreate. */
5846 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
5847 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
5848 if (fVars)
5849 {
5850 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
5851 do
5852 {
5853 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
5854 fVars &= ~RT_BIT_32(idxVar);
5855
5856 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
5857 {
5858 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
5859 continue;
5860 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5861 {
5862 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5863 if (idxHstReg != UINT8_MAX)
5864 {
5865 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5866 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5867 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
5868 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5869 }
5870 continue;
5871 }
5872 }
5873 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
5874 continue;
5875
5876 /* Irreconcilable, so drop it. */
5877 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5878 if (idxHstReg != UINT8_MAX)
5879 {
5880 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5881 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5882 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
5883 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5884 }
5885 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
5886 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5887 } while (fVars);
5888 }
5889
5890 /* Finally, check that the host register allocations matches. */
5891 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
5892 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
5893 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
5894 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
5895 }
5896
5897 /*
5898 * Define the endif label and maybe the else one if we're still in the 'if' part.
5899 */
5900 if (!pEntry->fInElse)
5901 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5902 else
5903 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
5904 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
5905
5906 /* Pop the conditional stack.*/
5907 pReNative->cCondDepth -= 1;
5908
5909 return off;
5910}
5911
5912
5913#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
5914 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
5915 do {
5916
5917/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
5918DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5919{
5920 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5921
5922 /* Get the eflags. */
5923 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5924 kIemNativeGstRegUse_ReadOnly);
5925
5926 /* Test and jump. */
5927 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5928
5929 /* Free but don't flush the EFlags register. */
5930 iemNativeRegFreeTmp(pReNative, idxEflReg);
5931
5932 /* Make a copy of the core state now as we start the if-block. */
5933 iemNativeCondStartIfBlock(pReNative, off);
5934
5935 return off;
5936}
5937
5938
5939#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
5940 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
5941 do {
5942
5943/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
5944DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5945{
5946 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5947
5948 /* Get the eflags. */
5949 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5950 kIemNativeGstRegUse_ReadOnly);
5951
5952 /* Test and jump. */
5953 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5954
5955 /* Free but don't flush the EFlags register. */
5956 iemNativeRegFreeTmp(pReNative, idxEflReg);
5957
5958 /* Make a copy of the core state now as we start the if-block. */
5959 iemNativeCondStartIfBlock(pReNative, off);
5960
5961 return off;
5962}
5963
5964
5965#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
5966 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
5967 do {
5968
5969/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
5970DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5971{
5972 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5973
5974 /* Get the eflags. */
5975 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5976 kIemNativeGstRegUse_ReadOnly);
5977
5978 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5979 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5980
5981 /* Test and jump. */
5982 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5983
5984 /* Free but don't flush the EFlags register. */
5985 iemNativeRegFreeTmp(pReNative, idxEflReg);
5986
5987 /* Make a copy of the core state now as we start the if-block. */
5988 iemNativeCondStartIfBlock(pReNative, off);
5989
5990 return off;
5991}
5992
5993
5994#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
5995 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
5996 do {
5997
5998/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
5999DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6000{
6001 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6002
6003 /* Get the eflags. */
6004 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6005 kIemNativeGstRegUse_ReadOnly);
6006
6007 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6008 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6009
6010 /* Test and jump. */
6011 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6012
6013 /* Free but don't flush the EFlags register. */
6014 iemNativeRegFreeTmp(pReNative, idxEflReg);
6015
6016 /* Make a copy of the core state now as we start the if-block. */
6017 iemNativeCondStartIfBlock(pReNative, off);
6018
6019 return off;
6020}
6021
6022
6023#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6024 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6025 do {
6026
6027#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6028 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6029 do {
6030
6031/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6032DECL_INLINE_THROW(uint32_t)
6033iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6034 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6035{
6036 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6037
6038 /* Get the eflags. */
6039 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6040 kIemNativeGstRegUse_ReadOnly);
6041
6042 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6043 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6044
6045 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6046 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6047 Assert(iBitNo1 != iBitNo2);
6048
6049#ifdef RT_ARCH_AMD64
6050 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6051
6052 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6053 if (iBitNo1 > iBitNo2)
6054 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6055 else
6056 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6057 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6058
6059#elif defined(RT_ARCH_ARM64)
6060 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6061 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6062
6063 /* and tmpreg, eflreg, #1<<iBitNo1 */
6064 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6065
6066 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6067 if (iBitNo1 > iBitNo2)
6068 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6069 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6070 else
6071 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6072 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6073
6074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6075
6076#else
6077# error "Port me"
6078#endif
6079
6080 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6081 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6082 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6083
6084 /* Free but don't flush the EFlags and tmp registers. */
6085 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6086 iemNativeRegFreeTmp(pReNative, idxEflReg);
6087
6088 /* Make a copy of the core state now as we start the if-block. */
6089 iemNativeCondStartIfBlock(pReNative, off);
6090
6091 return off;
6092}
6093
6094
6095#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6096 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6097 do {
6098
6099#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6100 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6101 do {
6102
6103/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6104 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6105DECL_INLINE_THROW(uint32_t)
6106iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6107 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6108{
6109 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6110
6111 /* We need an if-block label for the non-inverted variant. */
6112 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6113 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6114
6115 /* Get the eflags. */
6116 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6117 kIemNativeGstRegUse_ReadOnly);
6118
6119 /* Translate the flag masks to bit numbers. */
6120 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6121 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6122
6123 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6124 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6125 Assert(iBitNo1 != iBitNo);
6126
6127 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6128 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6129 Assert(iBitNo2 != iBitNo);
6130 Assert(iBitNo2 != iBitNo1);
6131
6132#ifdef RT_ARCH_AMD64
6133 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6134#elif defined(RT_ARCH_ARM64)
6135 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6136#endif
6137
6138 /* Check for the lone bit first. */
6139 if (!fInverted)
6140 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6141 else
6142 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6143
6144 /* Then extract and compare the other two bits. */
6145#ifdef RT_ARCH_AMD64
6146 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6147 if (iBitNo1 > iBitNo2)
6148 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6149 else
6150 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6151 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6152
6153#elif defined(RT_ARCH_ARM64)
6154 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6155
6156 /* and tmpreg, eflreg, #1<<iBitNo1 */
6157 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6158
6159 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6160 if (iBitNo1 > iBitNo2)
6161 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6162 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6163 else
6164 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6165 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6166
6167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6168
6169#else
6170# error "Port me"
6171#endif
6172
6173 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6174 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6175 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6176
6177 /* Free but don't flush the EFlags and tmp registers. */
6178 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6179 iemNativeRegFreeTmp(pReNative, idxEflReg);
6180
6181 /* Make a copy of the core state now as we start the if-block. */
6182 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
6183
6184 return off;
6185}
6186
6187
6188#define IEM_MC_IF_CX_IS_NZ() \
6189 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
6190 do {
6191
6192/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6193DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6194{
6195 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6196
6197 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6198 kIemNativeGstRegUse_ReadOnly);
6199 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6200 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6201
6202 iemNativeCondStartIfBlock(pReNative, off);
6203 return off;
6204}
6205
6206
6207#define IEM_MC_IF_ECX_IS_NZ() \
6208 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
6209 do {
6210
6211#define IEM_MC_IF_RCX_IS_NZ() \
6212 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
6213 do {
6214
6215/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
6216DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6217{
6218 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6219
6220 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6221 kIemNativeGstRegUse_ReadOnly);
6222 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6223 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6224
6225 iemNativeCondStartIfBlock(pReNative, off);
6226 return off;
6227}
6228
6229
6230#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6231 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
6232 do {
6233
6234#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6235 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
6236 do {
6237
6238/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6239DECL_INLINE_THROW(uint32_t)
6240iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
6241{
6242 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6243
6244 /* We have to load both RCX and EFLAGS before we can start branching,
6245 otherwise we'll end up in the else-block with an inconsistent
6246 register allocator state.
6247 Doing EFLAGS first as it's more likely to be loaded, right? */
6248 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6249 kIemNativeGstRegUse_ReadOnly);
6250 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6251 kIemNativeGstRegUse_ReadOnly);
6252
6253 /** @todo we could reduce this to a single branch instruction by spending a
6254 * temporary register and some setnz stuff. Not sure if loops are
6255 * worth it. */
6256 /* Check CX. */
6257 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6258
6259 /* Check the EFlags bit. */
6260 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6261 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6262 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6263 !fCheckIfSet /*fJmpIfSet*/);
6264
6265 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6266 iemNativeRegFreeTmp(pReNative, idxEflReg);
6267
6268 iemNativeCondStartIfBlock(pReNative, off);
6269 return off;
6270}
6271
6272
6273#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6274 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
6275 do {
6276
6277#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6278 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
6279 do {
6280
6281#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6282 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
6283 do {
6284
6285#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6286 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
6287 do {
6288
6289/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
6290 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
6291 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
6292 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
6293DECL_INLINE_THROW(uint32_t)
6294iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6295 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
6296{
6297 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6298
6299 /* We have to load both RCX and EFLAGS before we can start branching,
6300 otherwise we'll end up in the else-block with an inconsistent
6301 register allocator state.
6302 Doing EFLAGS first as it's more likely to be loaded, right? */
6303 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6304 kIemNativeGstRegUse_ReadOnly);
6305 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6306 kIemNativeGstRegUse_ReadOnly);
6307
6308 /** @todo we could reduce this to a single branch instruction by spending a
6309 * temporary register and some setnz stuff. Not sure if loops are
6310 * worth it. */
6311 /* Check RCX/ECX. */
6312 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6313
6314 /* Check the EFlags bit. */
6315 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6316 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6317 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6318 !fCheckIfSet /*fJmpIfSet*/);
6319
6320 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6321 iemNativeRegFreeTmp(pReNative, idxEflReg);
6322
6323 iemNativeCondStartIfBlock(pReNative, off);
6324 return off;
6325}
6326
6327
6328
6329/*********************************************************************************************************************************
6330* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6331*********************************************************************************************************************************/
6332/** Number of hidden arguments for CIMPL calls.
6333 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
6334#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6335# define IEM_CIMPL_HIDDEN_ARGS 3
6336#else
6337# define IEM_CIMPL_HIDDEN_ARGS 2
6338#endif
6339
6340#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
6341 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
6342
6343#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
6344 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
6345
6346#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
6347 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
6348
6349#define IEM_MC_LOCAL(a_Type, a_Name) \
6350 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
6351
6352#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
6353 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
6354
6355
6356/**
6357 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
6358 */
6359DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
6360{
6361 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
6362 return IEM_CIMPL_HIDDEN_ARGS;
6363 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
6364 return 1;
6365 return 0;
6366}
6367
6368
6369/**
6370 * Internal work that allocates a variable with kind set to
6371 * kIemNativeVarKind_Invalid and no current stack allocation.
6372 *
6373 * The kind will either be set by the caller or later when the variable is first
6374 * assigned a value.
6375 */
6376static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6377{
6378 Assert(cbType > 0 && cbType <= 64);
6379 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6380 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6381 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6382 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6383 pReNative->Core.aVars[idxVar].cbVar = cbType;
6384 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6385 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6386 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6387 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6388 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6389 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6390 pReNative->Core.aVars[idxVar].u.uValue = 0;
6391 return idxVar;
6392}
6393
6394
6395/**
6396 * Internal work that allocates an argument variable w/o setting enmKind.
6397 */
6398static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6399{
6400 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6401 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6402 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6403
6404 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6405 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
6406 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6407 return idxVar;
6408}
6409
6410
6411/**
6412 * Gets the stack slot for a stack variable, allocating one if necessary.
6413 *
6414 * Calling this function implies that the stack slot will contain a valid
6415 * variable value. The caller deals with any register currently assigned to the
6416 * variable, typically by spilling it into the stack slot.
6417 *
6418 * @returns The stack slot number.
6419 * @param pReNative The recompiler state.
6420 * @param idxVar The variable.
6421 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6422 */
6423DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6424{
6425 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6426 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6427
6428 /* Already got a slot? */
6429 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6430 if (idxStackSlot != UINT8_MAX)
6431 {
6432 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6433 return idxStackSlot;
6434 }
6435
6436 /*
6437 * A single slot is easy to allocate.
6438 * Allocate them from the top end, closest to BP, to reduce the displacement.
6439 */
6440 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
6441 {
6442 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6443 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6444 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6445 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6446 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
6447 return (uint8_t)iSlot;
6448 }
6449
6450 /*
6451 * We need more than one stack slot.
6452 *
6453 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6454 */
6455 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6456 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
6457 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
6458 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
6459 uint32_t bmStack = ~pReNative->Core.bmStack;
6460 while (bmStack != UINT32_MAX)
6461 {
6462/** @todo allocate from the top to reduce BP displacement. */
6463 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6464 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6465 if (!(iSlot & fBitAlignMask))
6466 {
6467 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6468 {
6469 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6470 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6471 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
6472 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
6473 return (uint8_t)iSlot;
6474 }
6475 }
6476 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6477 }
6478 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6479}
6480
6481
6482/**
6483 * Changes the variable to a stack variable.
6484 *
6485 * Currently this is s only possible to do the first time the variable is used,
6486 * switching later is can be implemented but not done.
6487 *
6488 * @param pReNative The recompiler state.
6489 * @param idxVar The variable.
6490 * @throws VERR_IEM_VAR_IPE_2
6491 */
6492static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6493{
6494 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6495 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6496 {
6497 /* We could in theory transition from immediate to stack as well, but it
6498 would involve the caller doing work storing the value on the stack. So,
6499 till that's required we only allow transition from invalid. */
6500 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6501 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6502 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6503 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
6504
6505 /* Note! We don't allocate a stack slot here, that's only done when a
6506 slot is actually needed to hold a variable value. */
6507 }
6508}
6509
6510
6511/**
6512 * Sets it to a variable with a constant value.
6513 *
6514 * This does not require stack storage as we know the value and can always
6515 * reload it, unless of course it's referenced.
6516 *
6517 * @param pReNative The recompiler state.
6518 * @param idxVar The variable.
6519 * @param uValue The immediate value.
6520 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6521 */
6522static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6523{
6524 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6525 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
6526 {
6527 /* Only simple transitions for now. */
6528 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6529 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6530 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
6531 }
6532 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6533
6534 pReNative->Core.aVars[idxVar].u.uValue = uValue;
6535 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
6536 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
6537 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
6538}
6539
6540
6541/**
6542 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6543 *
6544 * This does not require stack storage as we know the value and can always
6545 * reload it. Loading is postponed till needed.
6546 *
6547 * @param pReNative The recompiler state.
6548 * @param idxVar The variable.
6549 * @param idxOtherVar The variable to take the (stack) address of.
6550 *
6551 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6552 */
6553static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6554{
6555 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6556 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6557
6558 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6559 {
6560 /* Only simple transitions for now. */
6561 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6562 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6563 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6564 }
6565 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6566
6567 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
6568
6569 /* Update the other variable, ensure it's a stack variable. */
6570 /** @todo handle variables with const values... that'll go boom now. */
6571 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6572 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
6573}
6574
6575
6576/**
6577 * Sets the variable to a reference (pointer) to a guest register reference.
6578 *
6579 * This does not require stack storage as we know the value and can always
6580 * reload it. Loading is postponed till needed.
6581 *
6582 * @param pReNative The recompiler state.
6583 * @param idxVar The variable.
6584 * @param enmRegClass The class guest registers to reference.
6585 * @param idxReg The register within @a enmRegClass to reference.
6586 *
6587 * @throws VERR_IEM_VAR_IPE_2
6588 */
6589static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6590 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6591{
6592 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6593
6594 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
6595 {
6596 /* Only simple transitions for now. */
6597 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6598 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6599 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
6600 }
6601 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6602
6603 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
6604 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
6605}
6606
6607
6608DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6609{
6610 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6611}
6612
6613
6614DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6615{
6616 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6617
6618 /* Since we're using a generic uint64_t value type, we must truncate it if
6619 the variable is smaller otherwise we may end up with too large value when
6620 scaling up a imm8 w/ sign-extension.
6621
6622 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6623 in the bios, bx=1) when running on arm, because clang expect 16-bit
6624 register parameters to have bits 16 and up set to zero. Instead of
6625 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6626 CF value in the result. */
6627 switch (cbType)
6628 {
6629 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6630 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6631 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6632 }
6633 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6634 return idxVar;
6635}
6636
6637
6638DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6639{
6640 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6641 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6642 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6643 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6644
6645 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6646 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
6647 return idxArgVar;
6648}
6649
6650
6651DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6652{
6653 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6654 /* Don't set to stack now, leave that to the first use as for instance
6655 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6656 return idxVar;
6657}
6658
6659
6660DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6661{
6662 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6663
6664 /* Since we're using a generic uint64_t value type, we must truncate it if
6665 the variable is smaller otherwise we may end up with too large value when
6666 scaling up a imm8 w/ sign-extension. */
6667 switch (cbType)
6668 {
6669 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6670 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6671 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6672 }
6673 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6674 return idxVar;
6675}
6676
6677
6678/**
6679 * Releases the variable's register.
6680 *
6681 * The register must have been previously acquired calling
6682 * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
6683 * iemNativeVarRegisterSetAndAcquire().
6684 */
6685DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6686{
6687 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6688 Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
6689 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6690}
6691
6692
6693/**
6694 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6695 * fixed till we call iemNativeVarRegisterRelease.
6696 *
6697 * @returns The host register number.
6698 * @param pReNative The recompiler state.
6699 * @param idxVar The variable.
6700 * @param poff Pointer to the instruction buffer offset.
6701 * In case a register needs to be freed up or the value
6702 * loaded off the stack.
6703 * @param fInitialized Set if the variable must already have been initialized.
6704 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6705 * the case.
6706 * @param idxRegPref Preferred register number or UINT8_MAX.
6707 */
6708DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6709 bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX)
6710{
6711 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6712 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
6713 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6714
6715 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6716 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6717 {
6718 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
6719 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6720 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6721 return idxReg;
6722 }
6723
6724 /*
6725 * If the kind of variable has not yet been set, default to 'stack'.
6726 */
6727 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
6728 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6729 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
6730 iemNativeVarSetKindToStack(pReNative, idxVar);
6731
6732 /*
6733 * We have to allocate a register for the variable, even if its a stack one
6734 * as we don't know if there are modification being made to it before its
6735 * finalized (todo: analyze and insert hints about that?).
6736 *
6737 * If we can, we try get the correct register for argument variables. This
6738 * is assuming that most argument variables are fetched as close as possible
6739 * to the actual call, so that there aren't any interfering hidden calls
6740 * (memory accesses, etc) inbetween.
6741 *
6742 * If we cannot or it's a variable, we make sure no argument registers
6743 * that will be used by this MC block will be allocated here, and we always
6744 * prefer non-volatile registers to avoid needing to spill stuff for internal
6745 * call.
6746 */
6747 /** @todo Detect too early argument value fetches and warn about hidden
6748 * calls causing less optimal code to be generated in the python script. */
6749
6750 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6751 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6752 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6753 {
6754 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6755 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6756 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6757 }
6758 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6759 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6760 {
6761 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6762 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6763 & ~pReNative->Core.bmHstRegsWithGstShadow
6764 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6765 & fNotArgsMask;
6766 if (fRegs)
6767 {
6768 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6769 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6770 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6771 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6772 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6773 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6774 }
6775 else
6776 {
6777 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6778 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6779 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6780 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6781 }
6782 }
6783 else
6784 {
6785 idxReg = idxRegPref;
6786 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6787 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
6788 }
6789 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6790 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6791
6792 /*
6793 * Load it off the stack if we've got a stack slot.
6794 */
6795 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6796 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6797 {
6798 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6799 switch (pReNative->Core.aVars[idxVar].cbVar)
6800 {
6801 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6802 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6803 case 3: AssertFailed(); RT_FALL_THRU();
6804 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6805 default: AssertFailed(); RT_FALL_THRU();
6806 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6807 }
6808 }
6809 else
6810 {
6811 Assert(idxStackSlot == UINT8_MAX);
6812 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6813 }
6814 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6815 return idxReg;
6816}
6817
6818
6819/**
6820 * The value of variable @a idxVar will be written in full to the @a enmGstReg
6821 * guest register.
6822 *
6823 * This function makes sure there is a register for it and sets it to be the
6824 * current shadow copy of @a enmGstReg.
6825 *
6826 * @returns The host register number.
6827 * @param pReNative The recompiler state.
6828 * @param idxVar The variable.
6829 * @param enmGstReg The guest register this variable will be written to
6830 * after this call.
6831 * @param poff Pointer to the instruction buffer offset.
6832 * In case a register needs to be freed up or if the
6833 * variable content needs to be loaded off the stack.
6834 *
6835 * @note We DO NOT expect @a idxVar to be an argument variable,
6836 * because we can only in the commit stage of an instruction when this
6837 * function is used.
6838 */
6839DECL_HIDDEN_THROW(uint8_t)
6840iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
6841{
6842 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6843 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6844 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
6845 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
6846 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
6847 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
6848 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
6849 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6850
6851 /*
6852 * This shouldn't ever be used for arguments, unless it's in a weird else
6853 * branch that doesn't do any calling and even then it's questionable.
6854 *
6855 * However, in case someone writes crazy wrong MC code and does register
6856 * updates before making calls, just use the regular register allocator to
6857 * ensure we get a register suitable for the intended argument number.
6858 */
6859 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
6860
6861 /*
6862 * If there is already a register for the variable, we transfer/set the
6863 * guest shadow copy assignment to it.
6864 */
6865 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6866 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6867 {
6868 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
6869 {
6870 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
6871 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
6872 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
6873 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
6874 }
6875 else
6876 {
6877 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
6878 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
6879 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
6880 }
6881 /** @todo figure this one out. We need some way of making sure the register isn't
6882 * modified after this point, just in case we start writing crappy MC code. */
6883 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
6884 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6885 return idxReg;
6886 }
6887 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6888
6889 /*
6890 * Because this is supposed to be the commit stage, we're just tag along with the
6891 * temporary register allocator and upgrade it to a variable register.
6892 */
6893 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
6894 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
6895 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
6896 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
6897 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
6898 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6899
6900 /*
6901 * Now we need to load the register value.
6902 */
6903 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
6904 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
6905 else
6906 {
6907 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6908 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6909 switch (pReNative->Core.aVars[idxVar].cbVar)
6910 {
6911 case sizeof(uint64_t):
6912 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
6913 break;
6914 case sizeof(uint32_t):
6915 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
6916 break;
6917 case sizeof(uint16_t):
6918 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
6919 break;
6920 case sizeof(uint8_t):
6921 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
6922 break;
6923 default:
6924 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6925 }
6926 }
6927
6928 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6929 return idxReg;
6930}
6931
6932
6933/**
6934 * Sets the host register for @a idxVarRc to @a idxReg.
6935 *
6936 * The register must not be allocated. Any guest register shadowing will be
6937 * implictly dropped by this call.
6938 *
6939 * The variable must not have any register associated with it (causes
6940 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
6941 * implied.
6942 *
6943 * @returns idxReg
6944 * @param pReNative The recompiler state.
6945 * @param idxVar The variable.
6946 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
6947 * @param off For recording in debug info.
6948 *
6949 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
6950 */
6951DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
6952{
6953 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6954 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6955 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6956 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
6957 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
6958
6959 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
6960 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6961
6962 iemNativeVarSetKindToStack(pReNative, idxVar);
6963 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6964
6965 return idxReg;
6966}
6967
6968
6969/**
6970 * A convenient helper function.
6971 */
6972DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6973 uint8_t idxReg, uint32_t *poff)
6974{
6975 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
6976 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6977 return idxReg;
6978}
6979
6980
6981/**
6982 * Worker that frees the stack slots for variable @a idxVar if any allocated.
6983 *
6984 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
6985 */
6986DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6987{
6988 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6989 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6990 {
6991 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
6992 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
6993 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
6994 Assert(cSlots > 0);
6995 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
6996 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
6997 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
6998 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6999 }
7000 else
7001 Assert(idxStackSlot == UINT8_MAX);
7002}
7003
7004
7005/**
7006 * Worker that frees a single variable.
7007 *
7008 * ASSUMES that @a idxVar is valid.
7009 */
7010DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7011{
7012 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7013 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7014
7015 /* Free the host register first if any assigned. */
7016 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7017 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7018 {
7019 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7020 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7021 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7022 }
7023
7024 /* Free argument mapping. */
7025 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7026 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7027 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7028
7029 /* Free the stack slots. */
7030 iemNativeVarFreeStackSlots(pReNative, idxVar);
7031
7032 /* Free the actual variable. */
7033 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7034 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7035}
7036
7037
7038/**
7039 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7040 */
7041DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7042{
7043 while (bmVars != 0)
7044 {
7045 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7046 bmVars &= ~RT_BIT_32(idxVar);
7047
7048#if 1 /** @todo optimize by simplifying this later... */
7049 iemNativeVarFreeOneWorker(pReNative, idxVar);
7050#else
7051 /* Only need to free the host register, the rest is done as bulk updates below. */
7052 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7053 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7054 {
7055 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7056 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7057 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7058 }
7059#endif
7060 }
7061#if 0 /** @todo optimize by simplifying this later... */
7062 pReNative->Core.bmVars = 0;
7063 pReNative->Core.bmStack = 0;
7064 pReNative->Core.u64ArgVars = UINT64_MAX;
7065#endif
7066}
7067
7068
7069/**
7070 * This is called by IEM_MC_END() to clean up all variables.
7071 */
7072DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
7073{
7074 uint32_t const bmVars = pReNative->Core.bmVars;
7075 if (bmVars != 0)
7076 iemNativeVarFreeAllSlow(pReNative, bmVars);
7077 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7078 Assert(pReNative->Core.bmStack == 0);
7079}
7080
7081
7082#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
7083
7084/**
7085 * This is called by IEM_MC_FREE_LOCAL.
7086 */
7087DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7088{
7089 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7090 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7091 iemNativeVarFreeOneWorker(pReNative, idxVar);
7092}
7093
7094
7095#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
7096
7097/**
7098 * This is called by IEM_MC_FREE_ARG.
7099 */
7100DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7101{
7102 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7103 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
7104 iemNativeVarFreeOneWorker(pReNative, idxVar);
7105}
7106
7107
7108#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
7109
7110/**
7111 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
7112 */
7113DECL_INLINE_THROW(uint32_t)
7114iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
7115{
7116 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
7117 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
7118 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7119 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
7120 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
7121
7122 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
7123 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
7124 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
7125 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7126
7127 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
7128
7129 /*
7130 * Special case for immediates.
7131 */
7132 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
7133 {
7134 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7135 {
7136 case sizeof(uint16_t):
7137 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7138 break;
7139 case sizeof(uint32_t):
7140 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7141 break;
7142 default: AssertFailed(); break;
7143 }
7144 }
7145 else
7146 {
7147 /*
7148 * The generic solution for now.
7149 */
7150 /** @todo optimize this by having the python script make sure the source
7151 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
7152 * statement. Then we could just transfer the register assignments. */
7153 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
7154 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
7155 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7156 {
7157 case sizeof(uint16_t):
7158 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
7159 break;
7160 case sizeof(uint32_t):
7161 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
7162 break;
7163 default: AssertFailed(); break;
7164 }
7165 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
7166 iemNativeVarRegisterRelease(pReNative, idxVarDst);
7167 }
7168 return off;
7169}
7170
7171
7172
7173/*********************************************************************************************************************************
7174* Emitters for IEM_MC_CALL_CIMPL_XXX *
7175*********************************************************************************************************************************/
7176
7177/**
7178 * Emits code to load a reference to the given guest register into @a idxGprDst.
7179 */
7180DECL_INLINE_THROW(uint32_t)
7181iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7182 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7183{
7184 /*
7185 * Get the offset relative to the CPUMCTX structure.
7186 */
7187 uint32_t offCpumCtx;
7188 switch (enmClass)
7189 {
7190 case kIemNativeGstRegRef_Gpr:
7191 Assert(idxRegInClass < 16);
7192 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7193 break;
7194
7195 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7196 Assert(idxRegInClass < 4);
7197 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7198 break;
7199
7200 case kIemNativeGstRegRef_EFlags:
7201 Assert(idxRegInClass == 0);
7202 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7203 break;
7204
7205 case kIemNativeGstRegRef_MxCsr:
7206 Assert(idxRegInClass == 0);
7207 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7208 break;
7209
7210 case kIemNativeGstRegRef_FpuReg:
7211 Assert(idxRegInClass < 8);
7212 AssertFailed(); /** @todo what kind of indexing? */
7213 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7214 break;
7215
7216 case kIemNativeGstRegRef_MReg:
7217 Assert(idxRegInClass < 8);
7218 AssertFailed(); /** @todo what kind of indexing? */
7219 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7220 break;
7221
7222 case kIemNativeGstRegRef_XReg:
7223 Assert(idxRegInClass < 16);
7224 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7225 break;
7226
7227 default:
7228 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7229 }
7230
7231 /*
7232 * Load the value into the destination register.
7233 */
7234#ifdef RT_ARCH_AMD64
7235 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7236
7237#elif defined(RT_ARCH_ARM64)
7238 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7239 Assert(offCpumCtx < 4096);
7240 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7241
7242#else
7243# error "Port me!"
7244#endif
7245
7246 return off;
7247}
7248
7249
7250/**
7251 * Common code for CIMPL and AIMPL calls.
7252 *
7253 * These are calls that uses argument variables and such. They should not be
7254 * confused with internal calls required to implement an MC operation,
7255 * like a TLB load and similar.
7256 *
7257 * Upon return all that is left to do is to load any hidden arguments and
7258 * perform the call. All argument variables are freed.
7259 *
7260 * @returns New code buffer offset; throws VBox status code on error.
7261 * @param pReNative The native recompile state.
7262 * @param off The code buffer offset.
7263 * @param cArgs The total nubmer of arguments (includes hidden
7264 * count).
7265 * @param cHiddenArgs The number of hidden arguments. The hidden
7266 * arguments must not have any variable declared for
7267 * them, whereas all the regular arguments must
7268 * (tstIEMCheckMc ensures this).
7269 */
7270DECL_HIDDEN_THROW(uint32_t)
7271iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7272{
7273#ifdef VBOX_STRICT
7274 /*
7275 * Assert sanity.
7276 */
7277 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7278 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7279 for (unsigned i = 0; i < cHiddenArgs; i++)
7280 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7281 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7282 {
7283 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7284 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7285 }
7286 iemNativeRegAssertSanity(pReNative);
7287#endif
7288
7289 /*
7290 * Before we do anything else, go over variables that are referenced and
7291 * make sure they are not in a register.
7292 */
7293 uint32_t bmVars = pReNative->Core.bmVars;
7294 if (bmVars)
7295 {
7296 do
7297 {
7298 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7299 bmVars &= ~RT_BIT_32(idxVar);
7300
7301 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7302 {
7303 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7304 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7305 {
7306 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7307 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7308 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7309 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7310 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7311
7312 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7313 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7314 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7315 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7316 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7317 }
7318 }
7319 } while (bmVars != 0);
7320#if 0 //def VBOX_STRICT
7321 iemNativeRegAssertSanity(pReNative);
7322#endif
7323 }
7324
7325 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7326
7327 /*
7328 * First, go over the host registers that will be used for arguments and make
7329 * sure they either hold the desired argument or are free.
7330 */
7331 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7332 {
7333 for (uint32_t i = 0; i < cRegArgs; i++)
7334 {
7335 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7336 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7337 {
7338 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7339 {
7340 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7341 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7342 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
7343 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7344 if (uArgNo == i)
7345 { /* prefect */ }
7346 /* The variable allocator logic should make sure this is impossible,
7347 except for when the return register is used as a parameter (ARM,
7348 but not x86). */
7349#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7350 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7351 {
7352# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7353# error "Implement this"
7354# endif
7355 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7356 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7357 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7358 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7359 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7360 }
7361#endif
7362 else
7363 {
7364 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7365
7366 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
7367 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7368 else
7369 {
7370 /* just free it, can be reloaded if used again */
7371 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7372 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7373 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7374 }
7375 }
7376 }
7377 else
7378 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7379 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7380 }
7381 }
7382#if 0 //def VBOX_STRICT
7383 iemNativeRegAssertSanity(pReNative);
7384#endif
7385 }
7386
7387 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7388
7389#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7390 /*
7391 * If there are any stack arguments, make sure they are in their place as well.
7392 *
7393 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7394 * the caller) be loading it later and it must be free (see first loop).
7395 */
7396 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7397 {
7398 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7399 {
7400 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7401 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7402 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7403 {
7404 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7405 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
7406 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
7407 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7408 }
7409 else
7410 {
7411 /* Use ARG0 as temp for stuff we need registers for. */
7412 switch (pReNative->Core.aVars[idxVar].enmKind)
7413 {
7414 case kIemNativeVarKind_Stack:
7415 {
7416 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7417 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7418 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7419 iemNativeStackCalcBpDisp(idxStackSlot));
7420 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7421 continue;
7422 }
7423
7424 case kIemNativeVarKind_Immediate:
7425 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
7426 continue;
7427
7428 case kIemNativeVarKind_VarRef:
7429 {
7430 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7431 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7432 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7433 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7434 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7435 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7436 {
7437 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7438 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7439 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7440 }
7441 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7442 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7443 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7444 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7445 continue;
7446 }
7447
7448 case kIemNativeVarKind_GstRegRef:
7449 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7450 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7451 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7452 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7453 continue;
7454
7455 case kIemNativeVarKind_Invalid:
7456 case kIemNativeVarKind_End:
7457 break;
7458 }
7459 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7460 }
7461 }
7462# if 0 //def VBOX_STRICT
7463 iemNativeRegAssertSanity(pReNative);
7464# endif
7465 }
7466#else
7467 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7468#endif
7469
7470 /*
7471 * Make sure the argument variables are loaded into their respective registers.
7472 *
7473 * We can optimize this by ASSUMING that any register allocations are for
7474 * registeres that have already been loaded and are ready. The previous step
7475 * saw to that.
7476 */
7477 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
7478 {
7479 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7480 {
7481 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7482 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7483 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
7484 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
7485 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
7486 else
7487 {
7488 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7489 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7490 {
7491 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7492 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
7493 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
7494 | RT_BIT_32(idxArgReg);
7495 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
7496 }
7497 else
7498 {
7499 /* Use ARG0 as temp for stuff we need registers for. */
7500 switch (pReNative->Core.aVars[idxVar].enmKind)
7501 {
7502 case kIemNativeVarKind_Stack:
7503 {
7504 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7505 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7506 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
7507 continue;
7508 }
7509
7510 case kIemNativeVarKind_Immediate:
7511 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
7512 continue;
7513
7514 case kIemNativeVarKind_VarRef:
7515 {
7516 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7517 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7518 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7519 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7520 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7521 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7522 {
7523 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7524 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7525 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7526 }
7527 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7528 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7529 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
7530 continue;
7531 }
7532
7533 case kIemNativeVarKind_GstRegRef:
7534 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
7535 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7536 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7537 continue;
7538
7539 case kIemNativeVarKind_Invalid:
7540 case kIemNativeVarKind_End:
7541 break;
7542 }
7543 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7544 }
7545 }
7546 }
7547#if 0 //def VBOX_STRICT
7548 iemNativeRegAssertSanity(pReNative);
7549#endif
7550 }
7551#ifdef VBOX_STRICT
7552 else
7553 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7554 {
7555 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
7556 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
7557 }
7558#endif
7559
7560 /*
7561 * Free all argument variables (simplified).
7562 * Their lifetime always expires with the call they are for.
7563 */
7564 /** @todo Make the python script check that arguments aren't used after
7565 * IEM_MC_CALL_XXXX. */
7566 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
7567 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
7568 * an argument value. There is also some FPU stuff. */
7569 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
7570 {
7571 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7572 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7573
7574 /* no need to free registers: */
7575 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
7576 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
7577 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
7578 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
7579 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
7580 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
7581
7582 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
7583 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7584 iemNativeVarFreeStackSlots(pReNative, idxVar);
7585 }
7586 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7587
7588 /*
7589 * Flush volatile registers as we make the call.
7590 */
7591 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
7592
7593 return off;
7594}
7595
7596
7597/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
7598DECL_HIDDEN_THROW(uint32_t)
7599iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
7600 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
7601
7602{
7603 /*
7604 * Do all the call setup and cleanup.
7605 */
7606 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
7607
7608 /*
7609 * Load the two or three hidden arguments.
7610 */
7611#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7612 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7613 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7614 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
7615#else
7616 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7617 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
7618#endif
7619
7620 /*
7621 * Make the call and check the return code.
7622 *
7623 * Shadow PC copies are always flushed here, other stuff depends on flags.
7624 * Segment and general purpose registers are explictily flushed via the
7625 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
7626 * macros.
7627 */
7628 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
7629#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7630 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7631#endif
7632 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
7633 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
7634 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
7635 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7636
7637 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7638}
7639
7640
7641#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7642 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
7643
7644/** Emits code for IEM_MC_CALL_CIMPL_1. */
7645DECL_INLINE_THROW(uint32_t)
7646iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7647 uintptr_t pfnCImpl, uint8_t idxArg0)
7648{
7649 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7650 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
7651}
7652
7653
7654#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7655 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
7656
7657/** Emits code for IEM_MC_CALL_CIMPL_2. */
7658DECL_INLINE_THROW(uint32_t)
7659iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7660 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
7661{
7662 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7663 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7664 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
7665}
7666
7667
7668#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7669 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7670 (uintptr_t)a_pfnCImpl, a0, a1, a2)
7671
7672/** Emits code for IEM_MC_CALL_CIMPL_3. */
7673DECL_INLINE_THROW(uint32_t)
7674iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7675 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7676{
7677 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7678 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7679 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7680 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
7681}
7682
7683
7684#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
7685 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7686 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
7687
7688/** Emits code for IEM_MC_CALL_CIMPL_4. */
7689DECL_INLINE_THROW(uint32_t)
7690iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7691 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7692{
7693 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7694 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7695 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7696 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7697 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
7698}
7699
7700
7701#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
7702 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7703 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
7704
7705/** Emits code for IEM_MC_CALL_CIMPL_4. */
7706DECL_INLINE_THROW(uint32_t)
7707iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7708 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
7709{
7710 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7711 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7712 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7713 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7714 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
7715 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
7716}
7717
7718
7719/** Recompiler debugging: Flush guest register shadow copies. */
7720#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
7721
7722
7723
7724/*********************************************************************************************************************************
7725* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
7726*********************************************************************************************************************************/
7727
7728/**
7729 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
7730 */
7731DECL_INLINE_THROW(uint32_t)
7732iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7733 uintptr_t pfnAImpl, uint8_t cArgs)
7734{
7735 if (idxVarRc != UINT8_MAX)
7736 {
7737 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
7738 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
7739 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
7740 }
7741
7742 /*
7743 * Do all the call setup and cleanup.
7744 */
7745 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
7746
7747 /*
7748 * Make the call and update the return code variable if we've got one.
7749 */
7750 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
7751 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
7752 {
7753pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
7754 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
7755 }
7756
7757 return off;
7758}
7759
7760
7761
7762#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
7763 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
7764
7765#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
7766 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
7767
7768/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
7769DECL_INLINE_THROW(uint32_t)
7770iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
7771{
7772 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
7773}
7774
7775
7776#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
7777 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
7778
7779#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
7780 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
7781
7782/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
7783DECL_INLINE_THROW(uint32_t)
7784iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
7785{
7786 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7787 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
7788}
7789
7790
7791#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
7792 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
7793
7794#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
7795 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
7796
7797/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
7798DECL_INLINE_THROW(uint32_t)
7799iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7800 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
7801{
7802 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7803 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7804 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
7805}
7806
7807
7808#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
7809 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
7810
7811#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
7812 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
7813
7814/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
7815DECL_INLINE_THROW(uint32_t)
7816iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7817 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7818{
7819 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7820 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7821 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7822 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
7823}
7824
7825
7826#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
7827 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7828
7829#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
7830 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7831
7832/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
7833DECL_INLINE_THROW(uint32_t)
7834iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7835 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7836{
7837 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7838 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7839 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7840 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
7841 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
7842}
7843
7844
7845
7846/*********************************************************************************************************************************
7847* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
7848*********************************************************************************************************************************/
7849
7850#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
7851 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
7852
7853#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7854 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
7855
7856#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7857 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
7858
7859#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7860 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
7861
7862
7863/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
7864 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
7865DECL_INLINE_THROW(uint32_t)
7866iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
7867{
7868 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7869 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7870 Assert(iGRegEx < 20);
7871
7872 /* Same discussion as in iemNativeEmitFetchGregU16 */
7873 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7874 kIemNativeGstRegUse_ReadOnly);
7875
7876 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7877 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7878
7879 /* The value is zero-extended to the full 64-bit host register width. */
7880 if (iGRegEx < 16)
7881 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7882 else
7883 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7884
7885 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7886 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7887 return off;
7888}
7889
7890
7891#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7892 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
7893
7894#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7895 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
7896
7897#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7898 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
7899
7900/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
7901DECL_INLINE_THROW(uint32_t)
7902iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
7903{
7904 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7905 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7906 Assert(iGRegEx < 20);
7907
7908 /* Same discussion as in iemNativeEmitFetchGregU16 */
7909 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7910 kIemNativeGstRegUse_ReadOnly);
7911
7912 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7913 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7914
7915 if (iGRegEx < 16)
7916 {
7917 switch (cbSignExtended)
7918 {
7919 case sizeof(uint16_t):
7920 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7921 break;
7922 case sizeof(uint32_t):
7923 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7924 break;
7925 case sizeof(uint64_t):
7926 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7927 break;
7928 default: AssertFailed(); break;
7929 }
7930 }
7931 else
7932 {
7933 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7934 switch (cbSignExtended)
7935 {
7936 case sizeof(uint16_t):
7937 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7938 break;
7939 case sizeof(uint32_t):
7940 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7941 break;
7942 case sizeof(uint64_t):
7943 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7944 break;
7945 default: AssertFailed(); break;
7946 }
7947 }
7948
7949 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7950 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7951 return off;
7952}
7953
7954
7955
7956#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
7957 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
7958
7959#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
7960 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7961
7962#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
7963 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7964
7965/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
7966DECL_INLINE_THROW(uint32_t)
7967iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
7968{
7969 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7970 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7971 Assert(iGReg < 16);
7972
7973 /*
7974 * We can either just load the low 16-bit of the GPR into a host register
7975 * for the variable, or we can do so via a shadow copy host register. The
7976 * latter will avoid having to reload it if it's being stored later, but
7977 * will waste a host register if it isn't touched again. Since we don't
7978 * know what going to happen, we choose the latter for now.
7979 */
7980 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7981 kIemNativeGstRegUse_ReadOnly);
7982
7983 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7984 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7985 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7986 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7987
7988 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7989 return off;
7990}
7991
7992
7993#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
7994 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7995
7996#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
7997 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7998
7999/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
8000DECL_INLINE_THROW(uint32_t)
8001iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
8002{
8003 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8004 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8005 Assert(iGReg < 16);
8006
8007 /*
8008 * We can either just load the low 16-bit of the GPR into a host register
8009 * for the variable, or we can do so via a shadow copy host register. The
8010 * latter will avoid having to reload it if it's being stored later, but
8011 * will waste a host register if it isn't touched again. Since we don't
8012 * know what going to happen, we choose the latter for now.
8013 */
8014 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8015 kIemNativeGstRegUse_ReadOnly);
8016
8017 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8018 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8019 if (cbSignExtended == sizeof(uint32_t))
8020 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8021 else
8022 {
8023 Assert(cbSignExtended == sizeof(uint64_t));
8024 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8025 }
8026 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8027
8028 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8029 return off;
8030}
8031
8032
8033#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
8034 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
8035
8036#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
8037 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
8038
8039/** Emits code for IEM_MC_FETCH_GREG_U32. */
8040DECL_INLINE_THROW(uint32_t)
8041iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8042{
8043 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8044 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
8045 Assert(iGReg < 16);
8046
8047 /*
8048 * We can either just load the low 16-bit of the GPR into a host register
8049 * for the variable, or we can do so via a shadow copy host register. The
8050 * latter will avoid having to reload it if it's being stored later, but
8051 * will waste a host register if it isn't touched again. Since we don't
8052 * know what going to happen, we choose the latter for now.
8053 */
8054 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8055 kIemNativeGstRegUse_ReadOnly);
8056
8057 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8058 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8059 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8060 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8061
8062 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8063 return off;
8064}
8065
8066
8067#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
8068 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
8069
8070/** Emits code for IEM_MC_FETCH_GREG_U32. */
8071DECL_INLINE_THROW(uint32_t)
8072iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8073{
8074 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8075 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8076 Assert(iGReg < 16);
8077
8078 /*
8079 * We can either just load the low 32-bit of the GPR into a host register
8080 * for the variable, or we can do so via a shadow copy host register. The
8081 * latter will avoid having to reload it if it's being stored later, but
8082 * will waste a host register if it isn't touched again. Since we don't
8083 * know what going to happen, we choose the latter for now.
8084 */
8085 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8086 kIemNativeGstRegUse_ReadOnly);
8087
8088 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8089 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8090 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8091 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8092
8093 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8094 return off;
8095}
8096
8097
8098#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
8099 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8100
8101#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
8102 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8103
8104/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
8105 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
8106DECL_INLINE_THROW(uint32_t)
8107iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8108{
8109 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8110 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8111 Assert(iGReg < 16);
8112
8113 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8114 kIemNativeGstRegUse_ReadOnly);
8115
8116 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8117 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8118 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
8119 /** @todo name the register a shadow one already? */
8120 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8121
8122 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8123 return off;
8124}
8125
8126
8127
8128/*********************************************************************************************************************************
8129* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
8130*********************************************************************************************************************************/
8131
8132#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
8133 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
8134
8135/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
8136DECL_INLINE_THROW(uint32_t)
8137iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
8138{
8139 Assert(iGRegEx < 20);
8140 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8141 kIemNativeGstRegUse_ForUpdate);
8142#ifdef RT_ARCH_AMD64
8143 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8144
8145 /* To the lowest byte of the register: mov r8, imm8 */
8146 if (iGRegEx < 16)
8147 {
8148 if (idxGstTmpReg >= 8)
8149 pbCodeBuf[off++] = X86_OP_REX_B;
8150 else if (idxGstTmpReg >= 4)
8151 pbCodeBuf[off++] = X86_OP_REX;
8152 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8153 pbCodeBuf[off++] = u8Value;
8154 }
8155 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
8156 else if (idxGstTmpReg < 4)
8157 {
8158 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
8159 pbCodeBuf[off++] = u8Value;
8160 }
8161 else
8162 {
8163 /* ror reg64, 8 */
8164 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8165 pbCodeBuf[off++] = 0xc1;
8166 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8167 pbCodeBuf[off++] = 8;
8168
8169 /* mov reg8, imm8 */
8170 if (idxGstTmpReg >= 8)
8171 pbCodeBuf[off++] = X86_OP_REX_B;
8172 else if (idxGstTmpReg >= 4)
8173 pbCodeBuf[off++] = X86_OP_REX;
8174 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8175 pbCodeBuf[off++] = u8Value;
8176
8177 /* rol reg64, 8 */
8178 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8179 pbCodeBuf[off++] = 0xc1;
8180 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8181 pbCodeBuf[off++] = 8;
8182 }
8183
8184#elif defined(RT_ARCH_ARM64)
8185 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
8186 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8187 if (iGRegEx < 16)
8188 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
8189 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
8190 else
8191 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
8192 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
8193 iemNativeRegFreeTmp(pReNative, idxImmReg);
8194
8195#else
8196# error "Port me!"
8197#endif
8198
8199 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8200
8201 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8202
8203 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8204 return off;
8205}
8206
8207
8208#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
8209 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
8210
8211/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
8212DECL_INLINE_THROW(uint32_t)
8213iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
8214{
8215 Assert(iGRegEx < 20);
8216 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8217
8218 /*
8219 * If it's a constant value (unlikely) we treat this as a
8220 * IEM_MC_STORE_GREG_U8_CONST statement.
8221 */
8222 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8223 { /* likely */ }
8224 else
8225 {
8226 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8227 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8228 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8229 }
8230
8231 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8232 kIemNativeGstRegUse_ForUpdate);
8233 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8234
8235#ifdef RT_ARCH_AMD64
8236 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
8237 if (iGRegEx < 16)
8238 {
8239 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8240 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8241 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8242 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8243 pbCodeBuf[off++] = X86_OP_REX;
8244 pbCodeBuf[off++] = 0x8a;
8245 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8246 }
8247 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
8248 else if (idxGstTmpReg < 4 && idxVarReg < 4)
8249 {
8250 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
8251 pbCodeBuf[off++] = 0x8a;
8252 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
8253 }
8254 else
8255 {
8256 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
8257
8258 /* ror reg64, 8 */
8259 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8260 pbCodeBuf[off++] = 0xc1;
8261 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8262 pbCodeBuf[off++] = 8;
8263
8264 /* mov reg8, reg8(r/m) */
8265 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8266 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8267 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8268 pbCodeBuf[off++] = X86_OP_REX;
8269 pbCodeBuf[off++] = 0x8a;
8270 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8271
8272 /* rol reg64, 8 */
8273 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8274 pbCodeBuf[off++] = 0xc1;
8275 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8276 pbCodeBuf[off++] = 8;
8277 }
8278
8279#elif defined(RT_ARCH_ARM64)
8280 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
8281 or
8282 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
8283 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8284 if (iGRegEx < 16)
8285 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
8286 else
8287 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
8288
8289#else
8290# error "Port me!"
8291#endif
8292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8293
8294 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8295
8296 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8297 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8298 return off;
8299}
8300
8301
8302
8303#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
8304 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
8305
8306/** Emits code for IEM_MC_STORE_GREG_U16. */
8307DECL_INLINE_THROW(uint32_t)
8308iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
8309{
8310 Assert(iGReg < 16);
8311 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8312 kIemNativeGstRegUse_ForUpdate);
8313#ifdef RT_ARCH_AMD64
8314 /* mov reg16, imm16 */
8315 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8316 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8317 if (idxGstTmpReg >= 8)
8318 pbCodeBuf[off++] = X86_OP_REX_B;
8319 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
8320 pbCodeBuf[off++] = RT_BYTE1(uValue);
8321 pbCodeBuf[off++] = RT_BYTE2(uValue);
8322
8323#elif defined(RT_ARCH_ARM64)
8324 /* movk xdst, #uValue, lsl #0 */
8325 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8326 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
8327
8328#else
8329# error "Port me!"
8330#endif
8331
8332 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8333
8334 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8335 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8336 return off;
8337}
8338
8339
8340#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
8341 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
8342
8343/** Emits code for IEM_MC_STORE_GREG_U16. */
8344DECL_INLINE_THROW(uint32_t)
8345iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8346{
8347 Assert(iGReg < 16);
8348 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8349
8350 /*
8351 * If it's a constant value (unlikely) we treat this as a
8352 * IEM_MC_STORE_GREG_U16_CONST statement.
8353 */
8354 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8355 { /* likely */ }
8356 else
8357 {
8358 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8360 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8361 }
8362
8363 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8364 kIemNativeGstRegUse_ForUpdate);
8365
8366#ifdef RT_ARCH_AMD64
8367 /* mov reg16, reg16 or [mem16] */
8368 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8369 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8370 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8371 {
8372 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
8373 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
8374 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
8375 pbCodeBuf[off++] = 0x8b;
8376 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
8377 }
8378 else
8379 {
8380 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
8381 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8382 if (idxGstTmpReg >= 8)
8383 pbCodeBuf[off++] = X86_OP_REX_R;
8384 pbCodeBuf[off++] = 0x8b;
8385 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8386 }
8387
8388#elif defined(RT_ARCH_ARM64)
8389 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
8390 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8391 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8392 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
8393 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8394
8395#else
8396# error "Port me!"
8397#endif
8398
8399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8400
8401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8402 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8403 return off;
8404}
8405
8406
8407#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
8408 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
8409
8410/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
8411DECL_INLINE_THROW(uint32_t)
8412iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
8413{
8414 Assert(iGReg < 16);
8415 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8416 kIemNativeGstRegUse_ForFullWrite);
8417 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8418 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8419 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8420 return off;
8421}
8422
8423
8424#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
8425 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
8426
8427/** Emits code for IEM_MC_STORE_GREG_U32. */
8428DECL_INLINE_THROW(uint32_t)
8429iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8430{
8431 Assert(iGReg < 16);
8432 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8433
8434 /*
8435 * If it's a constant value (unlikely) we treat this as a
8436 * IEM_MC_STORE_GREG_U32_CONST statement.
8437 */
8438 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8439 { /* likely */ }
8440 else
8441 {
8442 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8443 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8444 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8445 }
8446
8447 /*
8448 * For the rest we allocate a guest register for the variable and writes
8449 * it to the CPUMCTX structure.
8450 */
8451 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8452 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8453#ifdef VBOX_STRICT
8454 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
8455#endif
8456 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8457 return off;
8458}
8459
8460
8461#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
8462 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
8463
8464/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
8465DECL_INLINE_THROW(uint32_t)
8466iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
8467{
8468 Assert(iGReg < 16);
8469 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8470 kIemNativeGstRegUse_ForFullWrite);
8471 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8472 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8473 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8474 return off;
8475}
8476
8477
8478#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
8479 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
8480
8481/** Emits code for IEM_MC_STORE_GREG_U64. */
8482DECL_INLINE_THROW(uint32_t)
8483iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8484{
8485 Assert(iGReg < 16);
8486 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8487
8488 /*
8489 * If it's a constant value (unlikely) we treat this as a
8490 * IEM_MC_STORE_GREG_U64_CONST statement.
8491 */
8492 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8493 { /* likely */ }
8494 else
8495 {
8496 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8497 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8498 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
8499 }
8500
8501 /*
8502 * For the rest we allocate a guest register for the variable and writes
8503 * it to the CPUMCTX structure.
8504 */
8505 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8506 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8507 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8508 return off;
8509}
8510
8511
8512#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
8513 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
8514
8515/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
8516DECL_INLINE_THROW(uint32_t)
8517iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
8518{
8519 Assert(iGReg < 16);
8520 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8521 kIemNativeGstRegUse_ForUpdate);
8522 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
8523 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8524 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8525 return off;
8526}
8527
8528
8529/*********************************************************************************************************************************
8530* General purpose register manipulation (add, sub). *
8531*********************************************************************************************************************************/
8532
8533#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8534 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8535
8536/** Emits code for IEM_MC_ADD_GREG_U16. */
8537DECL_INLINE_THROW(uint32_t)
8538iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
8539{
8540 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8541 kIemNativeGstRegUse_ForUpdate);
8542
8543#ifdef RT_ARCH_AMD64
8544 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8545 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8546 if (idxGstTmpReg >= 8)
8547 pbCodeBuf[off++] = X86_OP_REX_B;
8548 if (uAddend == 1)
8549 {
8550 pbCodeBuf[off++] = 0xff; /* inc */
8551 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8552 }
8553 else
8554 {
8555 pbCodeBuf[off++] = 0x81;
8556 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8557 pbCodeBuf[off++] = uAddend;
8558 pbCodeBuf[off++] = 0;
8559 }
8560
8561#else
8562 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8563 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8564
8565 /* sub tmp, gstgrp, uAddend */
8566 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
8567
8568 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8569 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8570
8571 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8572#endif
8573
8574 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8575
8576 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8577
8578 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8579 return off;
8580}
8581
8582
8583#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
8584 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8585
8586#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
8587 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8588
8589/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
8590DECL_INLINE_THROW(uint32_t)
8591iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
8592{
8593 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8594 kIemNativeGstRegUse_ForUpdate);
8595
8596#ifdef RT_ARCH_AMD64
8597 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8598 if (f64Bit)
8599 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8600 else if (idxGstTmpReg >= 8)
8601 pbCodeBuf[off++] = X86_OP_REX_B;
8602 if (uAddend == 1)
8603 {
8604 pbCodeBuf[off++] = 0xff; /* inc */
8605 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8606 }
8607 else if (uAddend < 128)
8608 {
8609 pbCodeBuf[off++] = 0x83; /* add */
8610 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8611 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8612 }
8613 else
8614 {
8615 pbCodeBuf[off++] = 0x81; /* add */
8616 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8617 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8618 pbCodeBuf[off++] = 0;
8619 pbCodeBuf[off++] = 0;
8620 pbCodeBuf[off++] = 0;
8621 }
8622
8623#else
8624 /* sub tmp, gstgrp, uAddend */
8625 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8626 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
8627
8628#endif
8629
8630 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8631
8632 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8633
8634 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8635 return off;
8636}
8637
8638
8639
8640#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8641 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8642
8643/** Emits code for IEM_MC_SUB_GREG_U16. */
8644DECL_INLINE_THROW(uint32_t)
8645iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
8646{
8647 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8648 kIemNativeGstRegUse_ForUpdate);
8649
8650#ifdef RT_ARCH_AMD64
8651 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8652 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8653 if (idxGstTmpReg >= 8)
8654 pbCodeBuf[off++] = X86_OP_REX_B;
8655 if (uSubtrahend == 1)
8656 {
8657 pbCodeBuf[off++] = 0xff; /* dec */
8658 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8659 }
8660 else
8661 {
8662 pbCodeBuf[off++] = 0x81;
8663 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8664 pbCodeBuf[off++] = uSubtrahend;
8665 pbCodeBuf[off++] = 0;
8666 }
8667
8668#else
8669 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8670 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8671
8672 /* sub tmp, gstgrp, uSubtrahend */
8673 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
8674
8675 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8676 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8677
8678 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8679#endif
8680
8681 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8682
8683 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8684
8685 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8686 return off;
8687}
8688
8689
8690#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
8691 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8692
8693#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
8694 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8695
8696/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
8697DECL_INLINE_THROW(uint32_t)
8698iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
8699{
8700 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8701 kIemNativeGstRegUse_ForUpdate);
8702
8703#ifdef RT_ARCH_AMD64
8704 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8705 if (f64Bit)
8706 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8707 else if (idxGstTmpReg >= 8)
8708 pbCodeBuf[off++] = X86_OP_REX_B;
8709 if (uSubtrahend == 1)
8710 {
8711 pbCodeBuf[off++] = 0xff; /* dec */
8712 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8713 }
8714 else if (uSubtrahend < 128)
8715 {
8716 pbCodeBuf[off++] = 0x83; /* sub */
8717 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8718 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8719 }
8720 else
8721 {
8722 pbCodeBuf[off++] = 0x81; /* sub */
8723 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8724 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8725 pbCodeBuf[off++] = 0;
8726 pbCodeBuf[off++] = 0;
8727 pbCodeBuf[off++] = 0;
8728 }
8729
8730#else
8731 /* sub tmp, gstgrp, uSubtrahend */
8732 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8733 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
8734
8735#endif
8736
8737 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8738
8739 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8740
8741 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8742 return off;
8743}
8744
8745
8746
8747/*********************************************************************************************************************************
8748* EFLAGS *
8749*********************************************************************************************************************************/
8750
8751#define IEM_MC_FETCH_EFLAGS(a_EFlags) \
8752 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
8753
8754/** Handles IEM_MC_FETCH_EFLAGS. */
8755DECL_INLINE_THROW(uint32_t)
8756iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8757{
8758 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8759 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8760
8761 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
8762 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8763 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8764 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
8765 return off;
8766}
8767
8768
8769#define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
8770 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
8771
8772/** Handles IEM_MC_COMMIT_EFLAGS. */
8773DECL_INLINE_THROW(uint32_t)
8774iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8775{
8776 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8777 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8778
8779 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
8780
8781#ifdef VBOX_STRICT
8782 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
8783 uint32_t offFixup = off;
8784 off = iemNativeEmitJnzToFixed(pReNative, off, off);
8785 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
8786 iemNativeFixupFixedJump(pReNative, offFixup, off);
8787
8788 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
8789 offFixup = off;
8790 off = iemNativeEmitJzToFixed(pReNative, off, off);
8791 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
8792 iemNativeFixupFixedJump(pReNative, offFixup, off);
8793#endif
8794
8795 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8796 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
8797 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
8798 return off;
8799}
8800
8801
8802
8803/*********************************************************************************************************************************
8804* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
8805*********************************************************************************************************************************/
8806
8807#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
8808 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
8809
8810#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
8811 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
8812
8813#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
8814 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
8815
8816
8817/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
8818 * IEM_MC_FETCH_SREG_ZX_U64. */
8819DECL_INLINE_THROW(uint32_t)
8820iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
8821{
8822 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8823 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
8824 Assert(iSReg < X86_SREG_COUNT);
8825
8826 /*
8827 * For now, we will not create a shadow copy of a selector. The rational
8828 * is that since we do not recompile the popping and loading of segment
8829 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
8830 * pushing and moving to registers, there is only a small chance that the
8831 * shadow copy will be accessed again before the register is reloaded. One
8832 * scenario would be nested called in 16-bit code, but I doubt it's worth
8833 * the extra register pressure atm.
8834 *
8835 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
8836 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
8837 * store scencario covered at present (r160730).
8838 */
8839 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8840 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8841 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
8842 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8843 return off;
8844}
8845
8846
8847
8848/*********************************************************************************************************************************
8849* Register references. *
8850*********************************************************************************************************************************/
8851
8852#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
8853 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
8854
8855#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
8856 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
8857
8858/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
8859DECL_INLINE_THROW(uint32_t)
8860iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
8861{
8862 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8863 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8864 Assert(iGRegEx < 20);
8865
8866 if (iGRegEx < 16)
8867 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8868 else
8869 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
8870
8871 /* If we've delayed writing back the register value, flush it now. */
8872 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8873
8874 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8875 if (!fConst)
8876 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
8877
8878 return off;
8879}
8880
8881#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
8882 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
8883
8884#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
8885 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
8886
8887#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
8888 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
8889
8890#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
8891 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
8892
8893#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
8894 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
8895
8896#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
8897 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
8898
8899#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
8900 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
8901
8902#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
8903 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
8904
8905#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
8906 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
8907
8908#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
8909 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
8910
8911/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
8912DECL_INLINE_THROW(uint32_t)
8913iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
8914{
8915 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8916 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8917 Assert(iGReg < 16);
8918
8919 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
8920
8921 /* If we've delayed writing back the register value, flush it now. */
8922 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
8923
8924 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8925 if (!fConst)
8926 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
8927
8928 return off;
8929}
8930
8931
8932#define IEM_MC_REF_EFLAGS(a_pEFlags) \
8933 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
8934
8935/** Handles IEM_MC_REF_EFLAGS. */
8936DECL_INLINE_THROW(uint32_t)
8937iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
8938{
8939 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8940 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8941
8942 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
8943
8944 /* If we've delayed writing back the register value, flush it now. */
8945 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
8946
8947 /* If there is a shadow copy of guest EFLAGS, flush it now. */
8948 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
8949
8950 return off;
8951}
8952
8953
8954/*********************************************************************************************************************************
8955* Effective Address Calculation *
8956*********************************************************************************************************************************/
8957#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
8958 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
8959
8960/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
8961 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
8962DECL_INLINE_THROW(uint32_t)
8963iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8964 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
8965{
8966 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
8967
8968 /*
8969 * Handle the disp16 form with no registers first.
8970 *
8971 * Convert to an immediate value, as that'll delay the register allocation
8972 * and assignment till the memory access / call / whatever and we can use
8973 * a more appropriate register (or none at all).
8974 */
8975 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
8976 {
8977 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
8978 return off;
8979 }
8980
8981 /* Determin the displacment. */
8982 uint16_t u16EffAddr;
8983 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
8984 {
8985 case 0: u16EffAddr = 0; break;
8986 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
8987 case 2: u16EffAddr = u16Disp; break;
8988 default: AssertFailedStmt(u16EffAddr = 0);
8989 }
8990
8991 /* Determine the registers involved. */
8992 uint8_t idxGstRegBase;
8993 uint8_t idxGstRegIndex;
8994 switch (bRm & X86_MODRM_RM_MASK)
8995 {
8996 case 0:
8997 idxGstRegBase = X86_GREG_xBX;
8998 idxGstRegIndex = X86_GREG_xSI;
8999 break;
9000 case 1:
9001 idxGstRegBase = X86_GREG_xBX;
9002 idxGstRegIndex = X86_GREG_xDI;
9003 break;
9004 case 2:
9005 idxGstRegBase = X86_GREG_xBP;
9006 idxGstRegIndex = X86_GREG_xSI;
9007 break;
9008 case 3:
9009 idxGstRegBase = X86_GREG_xBP;
9010 idxGstRegIndex = X86_GREG_xDI;
9011 break;
9012 case 4:
9013 idxGstRegBase = X86_GREG_xSI;
9014 idxGstRegIndex = UINT8_MAX;
9015 break;
9016 case 5:
9017 idxGstRegBase = X86_GREG_xDI;
9018 idxGstRegIndex = UINT8_MAX;
9019 break;
9020 case 6:
9021 idxGstRegBase = X86_GREG_xBP;
9022 idxGstRegIndex = UINT8_MAX;
9023 break;
9024#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
9025 default:
9026#endif
9027 case 7:
9028 idxGstRegBase = X86_GREG_xBX;
9029 idxGstRegIndex = UINT8_MAX;
9030 break;
9031 }
9032
9033 /*
9034 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
9035 */
9036 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9037 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9038 kIemNativeGstRegUse_ReadOnly);
9039 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
9040 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9041 kIemNativeGstRegUse_ReadOnly)
9042 : UINT8_MAX;
9043#ifdef RT_ARCH_AMD64
9044 if (idxRegIndex == UINT8_MAX)
9045 {
9046 if (u16EffAddr == 0)
9047 {
9048 /* movxz ret, base */
9049 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
9050 }
9051 else
9052 {
9053 /* lea ret32, [base64 + disp32] */
9054 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9055 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9056 if (idxRegRet >= 8 || idxRegBase >= 8)
9057 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9058 pbCodeBuf[off++] = 0x8d;
9059 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9060 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
9061 else
9062 {
9063 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
9064 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9065 }
9066 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9067 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9068 pbCodeBuf[off++] = 0;
9069 pbCodeBuf[off++] = 0;
9070 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9071
9072 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9073 }
9074 }
9075 else
9076 {
9077 /* lea ret32, [index64 + base64 (+ disp32)] */
9078 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9079 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9080 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9081 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9082 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9083 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9084 pbCodeBuf[off++] = 0x8d;
9085 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
9086 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9087 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
9088 if (bMod == X86_MOD_MEM4)
9089 {
9090 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9091 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9092 pbCodeBuf[off++] = 0;
9093 pbCodeBuf[off++] = 0;
9094 }
9095 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9096 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9097 }
9098
9099#elif defined(RT_ARCH_ARM64)
9100 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9101 if (u16EffAddr == 0)
9102 {
9103 if (idxRegIndex == UINT8_MAX)
9104 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
9105 else
9106 {
9107 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
9108 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9109 }
9110 }
9111 else
9112 {
9113 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
9114 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
9115 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
9116 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9117 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
9118 else
9119 {
9120 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
9121 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9122 }
9123 if (idxRegIndex != UINT8_MAX)
9124 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
9125 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9126 }
9127
9128#else
9129# error "port me"
9130#endif
9131
9132 if (idxRegIndex != UINT8_MAX)
9133 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9134 iemNativeRegFreeTmp(pReNative, idxRegBase);
9135 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9136 return off;
9137}
9138
9139
9140#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
9141 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
9142
9143/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
9144 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
9145DECL_INLINE_THROW(uint32_t)
9146iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9147 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
9148{
9149 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9150
9151 /*
9152 * Handle the disp32 form with no registers first.
9153 *
9154 * Convert to an immediate value, as that'll delay the register allocation
9155 * and assignment till the memory access / call / whatever and we can use
9156 * a more appropriate register (or none at all).
9157 */
9158 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9159 {
9160 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
9161 return off;
9162 }
9163
9164 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9165 uint32_t u32EffAddr = 0;
9166 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9167 {
9168 case 0: break;
9169 case 1: u32EffAddr = (int8_t)u32Disp; break;
9170 case 2: u32EffAddr = u32Disp; break;
9171 default: AssertFailed();
9172 }
9173
9174 /* Get the register (or SIB) value. */
9175 uint8_t idxGstRegBase = UINT8_MAX;
9176 uint8_t idxGstRegIndex = UINT8_MAX;
9177 uint8_t cShiftIndex = 0;
9178 switch (bRm & X86_MODRM_RM_MASK)
9179 {
9180 case 0: idxGstRegBase = X86_GREG_xAX; break;
9181 case 1: idxGstRegBase = X86_GREG_xCX; break;
9182 case 2: idxGstRegBase = X86_GREG_xDX; break;
9183 case 3: idxGstRegBase = X86_GREG_xBX; break;
9184 case 4: /* SIB */
9185 {
9186 /* index /w scaling . */
9187 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9188 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9189 {
9190 case 0: idxGstRegIndex = X86_GREG_xAX; break;
9191 case 1: idxGstRegIndex = X86_GREG_xCX; break;
9192 case 2: idxGstRegIndex = X86_GREG_xDX; break;
9193 case 3: idxGstRegIndex = X86_GREG_xBX; break;
9194 case 4: cShiftIndex = 0; /*no index*/ break;
9195 case 5: idxGstRegIndex = X86_GREG_xBP; break;
9196 case 6: idxGstRegIndex = X86_GREG_xSI; break;
9197 case 7: idxGstRegIndex = X86_GREG_xDI; break;
9198 }
9199
9200 /* base */
9201 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
9202 {
9203 case 0: idxGstRegBase = X86_GREG_xAX; break;
9204 case 1: idxGstRegBase = X86_GREG_xCX; break;
9205 case 2: idxGstRegBase = X86_GREG_xDX; break;
9206 case 3: idxGstRegBase = X86_GREG_xBX; break;
9207 case 4:
9208 idxGstRegBase = X86_GREG_xSP;
9209 u32EffAddr += uSibAndRspOffset >> 8;
9210 break;
9211 case 5:
9212 if ((bRm & X86_MODRM_MOD_MASK) != 0)
9213 idxGstRegBase = X86_GREG_xBP;
9214 else
9215 {
9216 Assert(u32EffAddr == 0);
9217 u32EffAddr = u32Disp;
9218 }
9219 break;
9220 case 6: idxGstRegBase = X86_GREG_xSI; break;
9221 case 7: idxGstRegBase = X86_GREG_xDI; break;
9222 }
9223 break;
9224 }
9225 case 5: idxGstRegBase = X86_GREG_xBP; break;
9226 case 6: idxGstRegBase = X86_GREG_xSI; break;
9227 case 7: idxGstRegBase = X86_GREG_xDI; break;
9228 }
9229
9230 /*
9231 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9232 * the start of the function.
9233 */
9234 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9235 {
9236 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
9237 return off;
9238 }
9239
9240 /*
9241 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9242 */
9243 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9244 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9245 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9246 kIemNativeGstRegUse_ReadOnly);
9247 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9248 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9249 kIemNativeGstRegUse_ReadOnly);
9250
9251 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9252 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9253 {
9254 idxRegBase = idxRegIndex;
9255 idxRegIndex = UINT8_MAX;
9256 }
9257
9258#ifdef RT_ARCH_AMD64
9259 if (idxRegIndex == UINT8_MAX)
9260 {
9261 if (u32EffAddr == 0)
9262 {
9263 /* mov ret, base */
9264 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9265 }
9266 else
9267 {
9268 /* lea ret32, [base64 + disp32] */
9269 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9270 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9271 if (idxRegRet >= 8 || idxRegBase >= 8)
9272 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9273 pbCodeBuf[off++] = 0x8d;
9274 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9275 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9276 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9277 else
9278 {
9279 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9280 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9281 }
9282 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9283 if (bMod == X86_MOD_MEM4)
9284 {
9285 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9286 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9287 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9288 }
9289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9290 }
9291 }
9292 else
9293 {
9294 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9295 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9296 if (idxRegBase == UINT8_MAX)
9297 {
9298 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
9299 if (idxRegRet >= 8 || idxRegIndex >= 8)
9300 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9301 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9302 pbCodeBuf[off++] = 0x8d;
9303 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9304 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9305 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9306 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9307 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9308 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9309 }
9310 else
9311 {
9312 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9313 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9314 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9315 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9316 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9317 pbCodeBuf[off++] = 0x8d;
9318 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9319 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9320 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9321 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9322 if (bMod != X86_MOD_MEM0)
9323 {
9324 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9325 if (bMod == X86_MOD_MEM4)
9326 {
9327 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9328 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9329 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9330 }
9331 }
9332 }
9333 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9334 }
9335
9336#elif defined(RT_ARCH_ARM64)
9337 if (u32EffAddr == 0)
9338 {
9339 if (idxRegIndex == UINT8_MAX)
9340 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9341 else if (idxRegBase == UINT8_MAX)
9342 {
9343 if (cShiftIndex == 0)
9344 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
9345 else
9346 {
9347 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9348 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
9349 }
9350 }
9351 else
9352 {
9353 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9354 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9355 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9356 }
9357 }
9358 else
9359 {
9360 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
9361 {
9362 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9363 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
9364 }
9365 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
9366 {
9367 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9368 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9369 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
9370 }
9371 else
9372 {
9373 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
9374 if (idxRegBase != UINT8_MAX)
9375 {
9376 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9377 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9378 }
9379 }
9380 if (idxRegIndex != UINT8_MAX)
9381 {
9382 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9383 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9384 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9385 }
9386 }
9387
9388#else
9389# error "port me"
9390#endif
9391
9392 if (idxRegIndex != UINT8_MAX)
9393 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9394 if (idxRegBase != UINT8_MAX)
9395 iemNativeRegFreeTmp(pReNative, idxRegBase);
9396 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9397 return off;
9398}
9399
9400
9401#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9402 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9403 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9404
9405#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9406 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9407 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9408
9409#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9410 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9411 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
9412
9413/**
9414 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
9415 *
9416 * @returns New off.
9417 * @param pReNative .
9418 * @param off .
9419 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
9420 * bit 4 to REX.X. The two bits are part of the
9421 * REG sub-field, which isn't needed in this
9422 * function.
9423 * @param uSibAndRspOffset Two parts:
9424 * - The first 8 bits make up the SIB byte.
9425 * - The next 8 bits are the fixed RSP/ESP offset
9426 * in case of a pop [xSP].
9427 * @param u32Disp The displacement byte/word/dword, if any.
9428 * @param cbInstr The size of the fully decoded instruction. Used
9429 * for RIP relative addressing.
9430 * @param idxVarRet The result variable number.
9431 * @param f64Bit Whether to use a 64-bit or 32-bit address size
9432 * when calculating the address.
9433 *
9434 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
9435 */
9436DECL_INLINE_THROW(uint32_t)
9437iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
9438 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
9439{
9440 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9441
9442 /*
9443 * Special case the rip + disp32 form first.
9444 */
9445 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9446 {
9447 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9448 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
9449 kIemNativeGstRegUse_ReadOnly);
9450#ifdef RT_ARCH_AMD64
9451 if (f64Bit)
9452 {
9453 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
9454 if ((int32_t)offFinalDisp == offFinalDisp)
9455 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
9456 else
9457 {
9458 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
9459 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
9460 }
9461 }
9462 else
9463 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
9464
9465#elif defined(RT_ARCH_ARM64)
9466 if (f64Bit)
9467 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9468 (int64_t)(int32_t)u32Disp + cbInstr);
9469 else
9470 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9471 (int32_t)u32Disp + cbInstr);
9472
9473#else
9474# error "Port me!"
9475#endif
9476 iemNativeRegFreeTmp(pReNative, idxRegPc);
9477 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9478 return off;
9479 }
9480
9481 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9482 int64_t i64EffAddr = 0;
9483 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9484 {
9485 case 0: break;
9486 case 1: i64EffAddr = (int8_t)u32Disp; break;
9487 case 2: i64EffAddr = (int32_t)u32Disp; break;
9488 default: AssertFailed();
9489 }
9490
9491 /* Get the register (or SIB) value. */
9492 uint8_t idxGstRegBase = UINT8_MAX;
9493 uint8_t idxGstRegIndex = UINT8_MAX;
9494 uint8_t cShiftIndex = 0;
9495 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
9496 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
9497 else /* SIB: */
9498 {
9499 /* index /w scaling . */
9500 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9501 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9502 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
9503 if (idxGstRegIndex == 4)
9504 {
9505 /* no index */
9506 cShiftIndex = 0;
9507 idxGstRegIndex = UINT8_MAX;
9508 }
9509
9510 /* base */
9511 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
9512 if (idxGstRegBase == 4)
9513 {
9514 /* pop [rsp] hack */
9515 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
9516 }
9517 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
9518 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
9519 {
9520 /* mod=0 and base=5 -> disp32, no base reg. */
9521 Assert(i64EffAddr == 0);
9522 i64EffAddr = (int32_t)u32Disp;
9523 idxGstRegBase = UINT8_MAX;
9524 }
9525 }
9526
9527 /*
9528 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9529 * the start of the function.
9530 */
9531 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9532 {
9533 if (f64Bit)
9534 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
9535 else
9536 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
9537 return off;
9538 }
9539
9540 /*
9541 * Now emit code that calculates:
9542 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9543 * or if !f64Bit:
9544 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9545 */
9546 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9547 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9548 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9549 kIemNativeGstRegUse_ReadOnly);
9550 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9551 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9552 kIemNativeGstRegUse_ReadOnly);
9553
9554 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9555 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9556 {
9557 idxRegBase = idxRegIndex;
9558 idxRegIndex = UINT8_MAX;
9559 }
9560
9561#ifdef RT_ARCH_AMD64
9562 uint8_t bFinalAdj;
9563 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
9564 bFinalAdj = 0; /* likely */
9565 else
9566 {
9567 /* pop [rsp] with a problematic disp32 value. Split out the
9568 RSP offset and add it separately afterwards (bFinalAdj). */
9569 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
9570 Assert(idxGstRegBase == X86_GREG_xSP);
9571 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
9572 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
9573 Assert(bFinalAdj != 0);
9574 i64EffAddr -= bFinalAdj;
9575 Assert((int32_t)i64EffAddr == i64EffAddr);
9576 }
9577 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
9578//pReNative->pInstrBuf[off++] = 0xcc;
9579
9580 if (idxRegIndex == UINT8_MAX)
9581 {
9582 if (u32EffAddr == 0)
9583 {
9584 /* mov ret, base */
9585 if (f64Bit)
9586 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
9587 else
9588 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9589 }
9590 else
9591 {
9592 /* lea ret, [base + disp32] */
9593 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9594 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9595 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
9596 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9597 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9598 | (f64Bit ? X86_OP_REX_W : 0);
9599 pbCodeBuf[off++] = 0x8d;
9600 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9601 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9602 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9603 else
9604 {
9605 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9606 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9607 }
9608 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9609 if (bMod == X86_MOD_MEM4)
9610 {
9611 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9612 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9613 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9614 }
9615 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9616 }
9617 }
9618 else
9619 {
9620 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9621 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9622 if (idxRegBase == UINT8_MAX)
9623 {
9624 /* lea ret, [(index64 << cShiftIndex) + disp32] */
9625 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
9626 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9627 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9628 | (f64Bit ? X86_OP_REX_W : 0);
9629 pbCodeBuf[off++] = 0x8d;
9630 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9631 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9632 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9633 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9634 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9635 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9636 }
9637 else
9638 {
9639 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9640 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9641 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9642 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9643 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9644 | (f64Bit ? X86_OP_REX_W : 0);
9645 pbCodeBuf[off++] = 0x8d;
9646 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9647 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9648 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9649 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9650 if (bMod != X86_MOD_MEM0)
9651 {
9652 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9653 if (bMod == X86_MOD_MEM4)
9654 {
9655 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9656 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9657 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9658 }
9659 }
9660 }
9661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9662 }
9663
9664 if (!bFinalAdj)
9665 { /* likely */ }
9666 else
9667 {
9668 Assert(f64Bit);
9669 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
9670 }
9671
9672#elif defined(RT_ARCH_ARM64)
9673 if (i64EffAddr == 0)
9674 {
9675 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9676 if (idxRegIndex == UINT8_MAX)
9677 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
9678 else if (idxRegBase != UINT8_MAX)
9679 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9680 f64Bit, false /*fSetFlags*/, cShiftIndex);
9681 else
9682 {
9683 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
9684 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
9685 }
9686 }
9687 else
9688 {
9689 if (f64Bit)
9690 { /* likely */ }
9691 else
9692 i64EffAddr = (int32_t)i64EffAddr;
9693
9694 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
9695 {
9696 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9697 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
9698 }
9699 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
9700 {
9701 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9702 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
9703 }
9704 else
9705 {
9706 if (f64Bit)
9707 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
9708 else
9709 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
9710 if (idxRegBase != UINT8_MAX)
9711 {
9712 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9713 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
9714 }
9715 }
9716 if (idxRegIndex != UINT8_MAX)
9717 {
9718 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9719 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9720 f64Bit, false /*fSetFlags*/, cShiftIndex);
9721 }
9722 }
9723
9724#else
9725# error "port me"
9726#endif
9727
9728 if (idxRegIndex != UINT8_MAX)
9729 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9730 if (idxRegBase != UINT8_MAX)
9731 iemNativeRegFreeTmp(pReNative, idxRegBase);
9732 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9733 return off;
9734}
9735
9736
9737
9738
9739/*********************************************************************************************************************************
9740* Memory fetches and stores common *
9741*********************************************************************************************************************************/
9742
9743typedef enum IEMNATIVEMITMEMOP
9744{
9745 kIemNativeEmitMemOp_Store = 0,
9746 kIemNativeEmitMemOp_Fetch,
9747 kIemNativeEmitMemOp_Fetch_Zx_U16,
9748 kIemNativeEmitMemOp_Fetch_Zx_U32,
9749 kIemNativeEmitMemOp_Fetch_Zx_U64,
9750 kIemNativeEmitMemOp_Fetch_Sx_U16,
9751 kIemNativeEmitMemOp_Fetch_Sx_U32,
9752 kIemNativeEmitMemOp_Fetch_Sx_U64
9753} IEMNATIVEMITMEMOP;
9754
9755/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
9756 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
9757 * (with iSegReg = UINT8_MAX). */
9758DECL_INLINE_THROW(uint32_t)
9759iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
9760 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
9761 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
9762{
9763 /*
9764 * Assert sanity.
9765 */
9766 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
9767 Assert( enmOp != kIemNativeEmitMemOp_Store
9768 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
9769 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
9770 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9771 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
9772 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
9773 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9774 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9775 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
9776 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9777#ifdef VBOX_STRICT
9778 if (iSegReg == UINT8_MAX)
9779 {
9780 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9781 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9782 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9783 switch (cbMem)
9784 {
9785 case 1:
9786 Assert( pfnFunction
9787 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
9788 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9789 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9790 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9791 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9792 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
9793 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
9794 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
9795 : UINT64_C(0xc000b000a0009000) ));
9796 break;
9797 case 2:
9798 Assert( pfnFunction
9799 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
9800 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
9801 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
9802 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
9803 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
9804 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
9805 : UINT64_C(0xc000b000a0009000) ));
9806 break;
9807 case 4:
9808 Assert( pfnFunction
9809 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
9810 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
9811 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
9812 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
9813 : UINT64_C(0xc000b000a0009000) ));
9814 break;
9815 case 8:
9816 Assert( pfnFunction
9817 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
9818 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
9819 : UINT64_C(0xc000b000a0009000) ));
9820 break;
9821 }
9822 }
9823 else
9824 {
9825 Assert(iSegReg < 6);
9826 switch (cbMem)
9827 {
9828 case 1:
9829 Assert( pfnFunction
9830 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
9831 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
9832 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
9833 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
9834 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
9835 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
9836 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
9837 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
9838 : UINT64_C(0xc000b000a0009000) ));
9839 break;
9840 case 2:
9841 Assert( pfnFunction
9842 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
9843 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
9844 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
9845 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
9846 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
9847 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
9848 : UINT64_C(0xc000b000a0009000) ));
9849 break;
9850 case 4:
9851 Assert( pfnFunction
9852 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
9853 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
9854 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
9855 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
9856 : UINT64_C(0xc000b000a0009000) ));
9857 break;
9858 case 8:
9859 Assert( pfnFunction
9860 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
9861 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
9862 : UINT64_C(0xc000b000a0009000) ));
9863 break;
9864 }
9865 }
9866#endif
9867
9868#ifdef VBOX_STRICT
9869 /*
9870 * Check that the fExec flags we've got make sense.
9871 */
9872 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9873#endif
9874
9875 /*
9876 * To keep things simple we have to commit any pending writes first as we
9877 * may end up making calls.
9878 */
9879 /** @todo we could postpone this till we make the call and reload the
9880 * registers after returning from the call. Not sure if that's sensible or
9881 * not, though. */
9882 off = iemNativeRegFlushPendingWrites(pReNative, off);
9883
9884 /*
9885 * Move/spill/flush stuff out of call-volatile registers.
9886 * This is the easy way out. We could contain this to the tlb-miss branch
9887 * by saving and restoring active stuff here.
9888 */
9889 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9890 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9891
9892 /*
9893 * Define labels and allocate the result register (trying for the return
9894 * register if we can).
9895 */
9896 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9897 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
9898 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9899 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX /* special case value storing below */
9900 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9901 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
9902 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
9903
9904 /*
9905 * First we try to go via the TLB.
9906 */
9907//pReNative->pInstrBuf[off++] = 0xcc;
9908 /** @todo later. */
9909 RT_NOREF(fAlignMask, cbMem);
9910
9911 /*
9912 * Call helper to do the fetching.
9913 * We flush all guest register shadow copies here.
9914 */
9915 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
9916
9917#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9918 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9919#else
9920 RT_NOREF(idxInstr);
9921#endif
9922
9923 uint8_t idxRegArgValue;
9924 if (iSegReg == UINT8_MAX)
9925 idxRegArgValue = IEMNATIVE_CALL_ARG2_GREG;
9926 else
9927 {
9928 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
9929 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9930 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
9931
9932 idxRegArgValue = IEMNATIVE_CALL_ARG3_GREG;
9933 }
9934
9935 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
9936 if (enmOp == kIemNativeEmitMemOp_Store)
9937 {
9938 if (pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate)
9939 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgValue, pReNative->Core.aVars[idxVarValue].u.uValue);
9940 else
9941 {
9942 uint8_t const idxRegVarValue = pReNative->Core.aVars[idxVarValue].idxReg;
9943 if (idxRegVarValue < RT_ELEMENTS(pReNative->Core.aHstRegs))
9944 {
9945 Assert(!(RT_BIT_32(idxRegVarValue) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9946 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgValue, idxRegVarValue);
9947 }
9948 else
9949 {
9950 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarValue].idxStackSlot;
9951 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9952 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgValue, iemNativeStackCalcBpDisp(idxStackSlot));
9953 }
9954 }
9955 }
9956
9957 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
9958 if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
9959 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG,
9960 pReNative->Core.aVars[idxVarGCPtrMem].u.uValue + offDisp);
9961 else
9962 {
9963 uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
9964 if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
9965 {
9966 Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9967 if (!offDisp)
9968 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem);
9969 else
9970 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem, offDisp);
9971 }
9972 else
9973 {
9974 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarGCPtrMem].idxStackSlot;
9975 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9976 AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
9977 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iemNativeStackCalcBpDisp(idxStackSlot));
9978 if (offDisp)
9979 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offDisp);
9980 }
9981 }
9982
9983 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9984 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9985
9986 /* Done setting up parameters, make the call. */
9987 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9988
9989 /*
9990 * Put the result in the right register if this is a fetch.
9991 */
9992 if (enmOp != kIemNativeEmitMemOp_Store)
9993 {
9994 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
9995 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
9996 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
9997 iemNativeVarRegisterRelease(pReNative, idxVarValue);
9998 }
9999
10000 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10001
10002 return off;
10003}
10004
10005
10006
10007/*********************************************************************************************************************************
10008* Memory fetches (IEM_MEM_FETCH_XXX). *
10009*********************************************************************************************************************************/
10010
10011/* 8-bit segmented: */
10012#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
10013 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
10014 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10015 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10016
10017#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10018 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10019 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10020 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10021
10022#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10023 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10024 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10025 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10026
10027#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10028 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10029 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10030 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10031
10032#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10033 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10034 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10035 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10036
10037#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10038 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10039 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10040 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10041
10042#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10043 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10044 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10045 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10046
10047/* 16-bit segmented: */
10048#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10049 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10050 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10051 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10052
10053#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10054 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10055 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10056 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10057
10058#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10059 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10060 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10061 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10062
10063#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10064 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10065 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10066 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10067
10068#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10069 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10070 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10071 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10072
10073#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10074 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10075 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10076 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10077
10078
10079/* 32-bit segmented: */
10080#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10081 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10082 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10083 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10084
10085#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10086 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10087 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10088 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10089
10090#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10091 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10092 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10093 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10094
10095#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10096 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10097 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10098 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10099
10100
10101/* 64-bit segmented: */
10102#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10103 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10104 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10105 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
10106
10107
10108
10109/* 8-bit flat: */
10110#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
10111 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
10112 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10113 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10114
10115#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
10116 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10117 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10118 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10119
10120#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
10121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10122 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10123 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10124
10125#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
10126 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10127 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10128 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10129
10130#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
10131 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10132 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10133 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10134
10135#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
10136 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10137 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10138 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10139
10140#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
10141 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10142 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10143 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10144
10145
10146/* 16-bit flat: */
10147#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
10148 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10149 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10150 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10151
10152#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
10153 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10154 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10155 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10156
10157#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
10158 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10159 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10160 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10161
10162#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
10163 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10164 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10165 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10166
10167#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
10168 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10169 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10170 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10171
10172#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
10173 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10174 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10175 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10176
10177/* 32-bit flat: */
10178#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
10179 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10180 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10181 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10182
10183#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
10184 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10185 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10186 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10187
10188#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
10189 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10190 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10191 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10192
10193#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
10194 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10195 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10196 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10197
10198/* 64-bit flat: */
10199#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
10200 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10201 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10202 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
10203
10204
10205
10206/*********************************************************************************************************************************
10207* Memory stores (IEM_MEM_STORE_XXX). *
10208*********************************************************************************************************************************/
10209
10210#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
10211 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
10212 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10213 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
10214
10215#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
10216 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
10217 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10218 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
10219
10220#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
10221 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
10222 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10223 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
10224
10225#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
10226 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
10227 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
10228 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
10229
10230
10231#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
10232 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
10233 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10234 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
10235
10236#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
10237 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
10238 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10239 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
10240
10241#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
10242 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
10243 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10244 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
10245
10246#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
10247 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
10248 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
10249 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
10250
10251
10252#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
10253 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10254 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
10255
10256#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
10257 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10258 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
10259
10260#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
10261 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10262 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
10263
10264#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
10265 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10266 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
10267
10268
10269#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
10270 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10271 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
10272
10273#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
10274 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10275 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
10276
10277#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
10278 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10279 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
10280
10281#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
10282 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10283 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
10284
10285/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
10286 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
10287DECL_INLINE_THROW(uint32_t)
10288iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
10289 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
10290{
10291 /*
10292 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
10293 * to do the grunt work.
10294 */
10295 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
10296 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
10297 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
10298 pfnFunction, idxInstr);
10299 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
10300 return off;
10301}
10302
10303
10304
10305/*********************************************************************************************************************************
10306* Stack Accesses. *
10307*********************************************************************************************************************************/
10308/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
10309#define IEM_MC_PUSH_U16(a_u16Value) \
10310 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
10311 (uintptr_t)iemNativeHlpStackPushU16, pCallEntry->idxInstr)
10312#define IEM_MC_PUSH_U32(a_u32Value) \
10313 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
10314 (uintptr_t)iemNativeHlpStackPushU32, pCallEntry->idxInstr)
10315#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
10316 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
10317 (uintptr_t)iemNativeHlpStackPushU32SReg, pCallEntry->idxInstr)
10318#define IEM_MC_PUSH_U64(a_u64Value) \
10319 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
10320 (uintptr_t)iemNativeHlpStackPushU64, pCallEntry->idxInstr)
10321
10322#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
10323 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
10324 (uintptr_t)iemNativeHlpStackFlat32PushU16, pCallEntry->idxInstr)
10325#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
10326 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
10327 (uintptr_t)iemNativeHlpStackFlat32PushU32, pCallEntry->idxInstr)
10328#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
10329 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
10330 (uintptr_t)iemNativeHlpStackFlat32PushU32SReg, pCallEntry->idxInstr)
10331
10332#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
10333 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
10334 (uintptr_t)iemNativeHlpStackFlat64PushU16, pCallEntry->idxInstr)
10335#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
10336 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
10337 (uintptr_t)iemNativeHlpStackFlat64PushU64, pCallEntry->idxInstr)
10338
10339/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
10340DECL_INLINE_THROW(uint32_t)
10341iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
10342 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
10343{
10344 /*
10345 * Assert sanity.
10346 */
10347 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10348#ifdef VBOX_STRICT
10349 if (RT_BYTE2(cBitsVarAndFlat) != 0)
10350 {
10351 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10352 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10353 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10354 Assert( pfnFunction
10355 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU16
10356 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32
10357 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32SReg
10358 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU16
10359 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU64
10360 : UINT64_C(0xc000b000a0009000) ));
10361 }
10362 else
10363 Assert( pfnFunction
10364 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU16
10365 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU32
10366 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackPushU32SReg
10367 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU64
10368 : UINT64_C(0xc000b000a0009000) ));
10369#endif
10370
10371#ifdef VBOX_STRICT
10372 /*
10373 * Check that the fExec flags we've got make sense.
10374 */
10375 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10376#endif
10377
10378 /*
10379 * To keep things simple we have to commit any pending writes first as we
10380 * may end up making calls.
10381 */
10382 /** @todo we could postpone this till we make the call and reload the
10383 * registers after returning from the call. Not sure if that's sensible or
10384 * not, though. */
10385 off = iemNativeRegFlushPendingWrites(pReNative, off);
10386
10387 /*
10388 * Move/spill/flush stuff out of call-volatile registers, keeping whatever
10389 * idxVarValue might be occupying.
10390 *
10391 * This is the easy way out. We could contain this to the tlb-miss branch
10392 * by saving and restoring active stuff here.
10393 */
10394 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10395 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarValue));
10396
10397 /* For now, flush any shadow copy of the xSP register. */
10398 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
10399
10400 /*
10401 * Define labels and allocate the result register (trying for the return
10402 * register if we can).
10403 */
10404 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10405 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10406 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10407
10408 /*
10409 * First we try to go via the TLB.
10410 */
10411//pReNative->pInstrBuf[off++] = 0xcc;
10412 /** @todo later. */
10413 RT_NOREF(cBitsVarAndFlat);
10414
10415 /*
10416 * Call helper to do the popping.
10417 */
10418 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10419
10420#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10421 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10422#else
10423 RT_NOREF(idxInstr);
10424#endif
10425
10426 /* IEMNATIVE_CALL_ARG1_GREG = idxVarValue (first) */
10427 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarValue,
10428 0 /*offAddend*/, true /*fVarAllowInVolatileReg*/);
10429
10430 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10431 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10432
10433 /* Done setting up parameters, make the call. */
10434 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10435
10436 /* The value variable is implictly flushed. */
10437 iemNativeVarFreeLocal(pReNative, idxVarValue);
10438
10439 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10440
10441 return off;
10442}
10443
10444
10445
10446/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
10447#define IEM_MC_POP_GREG_U16(a_iGReg) \
10448 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
10449 (uintptr_t)iemNativeHlpStackPopGRegU16, pCallEntry->idxInstr)
10450#define IEM_MC_POP_GREG_U32(a_iGReg) \
10451 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
10452 (uintptr_t)iemNativeHlpStackPopGRegU32, pCallEntry->idxInstr)
10453#define IEM_MC_POP_GREG_U64(a_iGReg) \
10454 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
10455 (uintptr_t)iemNativeHlpStackPopGRegU64, pCallEntry->idxInstr)
10456
10457#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
10458 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
10459 (uintptr_t)iemNativeHlpStackFlat32PopGRegU16, pCallEntry->idxInstr)
10460#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
10461 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
10462 (uintptr_t)iemNativeHlpStackFlat32PopGRegU32, pCallEntry->idxInstr)
10463
10464#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
10465 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
10466 (uintptr_t)iemNativeHlpStackFlat64PopGRegU16, pCallEntry->idxInstr)
10467#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
10468 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
10469 (uintptr_t)iemNativeHlpStackFlat64PopGRegU64, pCallEntry->idxInstr)
10470
10471/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
10472DECL_INLINE_THROW(uint32_t)
10473iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
10474 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
10475{
10476 /*
10477 * Assert sanity.
10478 */
10479 Assert(idxGReg < 16);
10480#ifdef VBOX_STRICT
10481 if (RT_BYTE2(cBitsVarAndFlat) != 0)
10482 {
10483 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10484 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10485 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10486 Assert( pfnFunction
10487 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU16
10488 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU32
10489 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU16
10490 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU64
10491 : UINT64_C(0xc000b000a0009000) ));
10492 }
10493 else
10494 Assert( pfnFunction
10495 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU16
10496 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU32
10497 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU64
10498 : UINT64_C(0xc000b000a0009000) ));
10499#endif
10500
10501#ifdef VBOX_STRICT
10502 /*
10503 * Check that the fExec flags we've got make sense.
10504 */
10505 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10506#endif
10507
10508 /*
10509 * To keep things simple we have to commit any pending writes first as we
10510 * may end up making calls.
10511 */
10512 /** @todo we could postpone this till we make the call and reload the
10513 * registers after returning from the call. Not sure if that's sensible or
10514 * not, though. */
10515 off = iemNativeRegFlushPendingWrites(pReNative, off);
10516
10517 /*
10518 * Move/spill/flush stuff out of call-volatile registers.
10519 * This is the easy way out. We could contain this to the tlb-miss branch
10520 * by saving and restoring active stuff here.
10521 */
10522 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10523 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10524
10525 /* For now, flush the any shadow copy of the guest register that is about
10526 to be popped and the xSP register. */
10527 iemNativeRegFlushGuestShadows(pReNative,
10528 RT_BIT_64(IEMNATIVEGSTREG_GPR(idxGReg)) | RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
10529
10530 /*
10531 * Define labels and allocate the result register (trying for the return
10532 * register if we can).
10533 */
10534 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10535 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10536 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10537
10538 /*
10539 * First we try to go via the TLB.
10540 */
10541//pReNative->pInstrBuf[off++] = 0xcc;
10542 /** @todo later. */
10543 RT_NOREF(cBitsVarAndFlat);
10544
10545 /*
10546 * Call helper to do the popping.
10547 */
10548 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10549
10550#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10551 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10552#else
10553 RT_NOREF(idxInstr);
10554#endif
10555
10556 /* IEMNATIVE_CALL_ARG1_GREG = iGReg */
10557 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxGReg);
10558
10559 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10560 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10561
10562 /* Done setting up parameters, make the call. */
10563 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10564
10565 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10566
10567 return off;
10568}
10569
10570
10571
10572/*********************************************************************************************************************************
10573* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
10574*********************************************************************************************************************************/
10575
10576#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10577 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10578 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10579 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
10580
10581#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10582 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10583 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10584 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
10585
10586#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10587 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10588 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
10589 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
10590
10591
10592#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10593 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10594 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10595 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
10596
10597#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10598 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10599 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10600 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
10601
10602#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10603 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10604 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10605 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
10606
10607#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10608 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
10609 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10610 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
10611
10612
10613#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10614 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10615 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10616 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
10617
10618#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10619 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10620 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10621 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
10622
10623#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10624 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10625 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10626 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
10627
10628#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10629 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
10630 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10631 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
10632
10633
10634#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10635 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10636 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10637 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
10638
10639#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10640 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10641 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10642 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
10643
10644#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10645 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10646 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10647 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
10648
10649#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10650 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
10651 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10652 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
10653
10654
10655#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10656 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
10657 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10658 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
10659
10660#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10661 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
10662 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
10663 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
10664
10665
10666#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10667 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
10668 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10669 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
10670
10671#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10672 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
10673 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10674 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
10675
10676#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10677 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
10678 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10679 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
10680
10681
10682
10683#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
10684 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10685 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10686 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
10687
10688#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
10689 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10690 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10691 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
10692
10693#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
10694 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10695 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
10696 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
10697
10698
10699#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
10700 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10701 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10702 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
10703
10704#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
10705 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10706 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10707 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
10708
10709#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
10710 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10711 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10712 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
10713
10714#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
10715 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
10716 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10717 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
10718
10719
10720#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
10721 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10722 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10723 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
10724
10725#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
10726 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10727 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10728 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
10729
10730#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
10731 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10732 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10733 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
10734
10735#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
10736 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
10737 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10738 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
10739
10740
10741#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
10742 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10743 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10744 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
10745
10746#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
10747 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10748 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10749 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
10750
10751#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
10752 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10753 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10754 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
10755
10756#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
10757 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
10758 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10759 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
10760
10761
10762#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
10763 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
10764 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10765 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
10766
10767#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
10768 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
10769 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
10770 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
10771
10772
10773#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
10774 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
10775 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10776 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
10777
10778#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
10779 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
10780 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10781 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
10782
10783#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
10784 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
10785 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10786 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
10787
10788
10789DECL_INLINE_THROW(uint32_t)
10790iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
10791 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
10792 uintptr_t pfnFunction, uint8_t idxInstr)
10793{
10794 /*
10795 * Assert sanity.
10796 */
10797 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
10798 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
10799 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
10800 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10801
10802 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
10803 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
10804 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
10805 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10806
10807 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
10808 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10809 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
10810 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10811
10812 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
10813
10814 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
10815
10816#ifdef VBOX_STRICT
10817# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
10818 ( ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
10819 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
10820 : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
10821 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
10822
10823 if (iSegReg == UINT8_MAX)
10824 {
10825 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10826 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10827 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10828 switch (cbMem)
10829 {
10830 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
10831 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
10832 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
10833 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
10834 case 10:
10835 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
10836 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
10837 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
10838 break;
10839 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
10840# if 0
10841 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
10842 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
10843# endif
10844 default: AssertFailed(); break;
10845 }
10846 }
10847 else
10848 {
10849 Assert(iSegReg < 6);
10850 switch (cbMem)
10851 {
10852 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
10853 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
10854 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
10855 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
10856 case 10:
10857 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
10858 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
10859 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
10860 break;
10861 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
10862# if 0
10863 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
10864 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
10865# endif
10866 default: AssertFailed(); break;
10867 }
10868 }
10869# undef IEM_MAP_HLP_FN
10870#endif
10871
10872#ifdef VBOX_STRICT
10873 /*
10874 * Check that the fExec flags we've got make sense.
10875 */
10876 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10877#endif
10878
10879 /*
10880 * To keep things simple we have to commit any pending writes first as we
10881 * may end up making calls.
10882 */
10883 /** @todo we could postpone this till we make the call and reload the
10884 * registers after returning from the call. Not sure if that's sensible or
10885 * not, though. */
10886 off = iemNativeRegFlushPendingWrites(pReNative, off);
10887
10888 /*
10889 * Move/spill/flush stuff out of call-volatile registers.
10890 * This is the easy way out. We could contain this to the tlb-miss branch
10891 * by saving and restoring active stuff here.
10892 */
10893 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10894 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10895
10896 /*
10897 * Define labels and allocate the result register (trying for the return
10898 * register if we can - which we of course can, given the above call).
10899 */
10900 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10901 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10902 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10903 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
10904 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
10905 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
10906
10907 /*
10908 * First we try to go via the TLB.
10909 */
10910#if defined(RT_ARCH_AMD64) && 0 /* untested code sketch */
10911 uint8_t const idxRegPtr = iemNativeVarRegisterAcquire(pReNative, idxVarGCPtrMem, &off,
10912 true /*fInitialized*/, IEMNATIVE_CALL_ARG2_GREG);
10913 uint8_t const idxRegSegBase = iSegReg == UINT8_MAX ? UINT8_MAX
10914 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(iSegReg));
10915 uint8_t const idxRegSegLimit = iSegReg == UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT
10916 ? UINT8_MAX
10917 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(iSegReg));
10918 uint8_t const idxRegSegAttrib = iSegReg == UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT
10919 ? UINT8_MAX
10920 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(iSegReg));
10921 uint8_t const idxReg1 = iemNativeRegAllocTmp(pReNative, &off);
10922 uint8_t const idxReg2 = iemNativeRegAllocTmp(pReNative, &off);
10923 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 256);
10924 pbCodeBuf[off++] = 0xcc;
10925
10926 /*
10927 * 1. Segmentation.
10928 *
10929 * 1a. Check segment limit and attributes if non-flat 32-bit code. This is complicated.
10930 */
10931 if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
10932 {
10933 /* If we're accessing more than one byte, put the last address we'll be
10934 accessing in idxReg2 (64-bit). */
10935 if (cbMem > 1)
10936 {
10937 /* mov reg2, cbMem-1 */
10938 off = iemNativeEmitLoadGpr32ImmEx(pbCodeBuf, off, idxReg2, cbMem - 1);
10939 /* add reg2, regptr */
10940 off = iemNativeEmitAddTwoGprsEx(pbCodeBuf, off, idxReg2, idxRegPtr);
10941 }
10942
10943 /* Check that we've got a segment loaded and that it allows the access.
10944 For write access this means a writable data segment.
10945 For read-only accesses this means a readable code segment or any data segment. */
10946 if (fAccess & IEM_ACCESS_TYPE_WRITE)
10947 {
10948 uint32_t const fMustBe1 = X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_WRITE;
10949 uint32_t const fMustBe0 = X86DESCATTR_UNUSABLE | X86_SEL_TYPE_CODE;
10950 /* mov reg1, must1|must0 */
10951 off = iemNativeEmitLoadGpr32ImmEx(pbCodeBuf, off, idxReg1, fMustBe1 | fMustBe0);
10952 /* and reg1, segattrs */
10953 off = iemNativeEmitAndGpr32ByGpr32Ex(pbCodeBuf, off, idxReg1, idxRegSegAttrib);
10954 /* cmp reg1, must1 */
10955 off = iemNativeEmitCmpGpr32WithImmEx(pbCodeBuf, off, idxReg1, fMustBe1);
10956 /* jne tlbmiss */
10957 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10958 }
10959 else
10960 {
10961 /* U | !P |!DT |!CD | RW |
10962 16 | 8 | 4 | 3 | 1 |
10963 -------------------------------
10964 0 | 0 | 0 | 0 | 0 | execute-only code segment. - must be excluded
10965 0 | 0 | 0 | 0 | 1 | execute-read code segment.
10966 0 | 0 | 0 | 1 | 0 | read-only data segment.
10967 0 | 0 | 0 | 1 | 1 | read-write data segment. - last valid combination
10968 */
10969 /* mov reg1, relevant attributes */
10970 off = iemNativeEmitCmpGpr32WithImmEx(pbCodeBuf, off, idxReg1,
10971 X86DESCATTR_UNUSABLE | X86DESCATTR_P | X86DESCATTR_DT
10972 | X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE);
10973 /* and reg1, segattrs */
10974 off = iemNativeEmitAndGpr32ByGpr32Ex(pbCodeBuf, off, idxReg1, idxRegSegAttrib);
10975 /* xor reg1, X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE ; place C=1 RW=0 at the bottom & limit the range. */
10976 off = iemNativeEmitXorGpr32ByImmEx(pbCodeBuf, off, idxReg1, X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE);
10977 /* sub reg1, X86_SEL_TYPE_WRITE ; ER-code=0, EO-code=0xffffffff, R0-data=7, RW-data=9 */
10978 off = iemNativeEmitSubGpr32ImmEx(pbCodeBuf, off, idxReg1, X86_SEL_TYPE_WRITE);
10979 /* cmp reg1, X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE */
10980 AssertCompile((X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE) - 1 == 9);
10981 off = iemNativeEmitCmpGpr32WithImmEx(pbCodeBuf, off, idxReg1, (X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE) - 1);
10982 /* ja tlbmiss */
10983 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10984 }
10985
10986 /*
10987 * Check the limit. If this is a write access, we know that it's a
10988 * data segment and includes the expand_down bit. For read-only accesses
10989 * we need to check that code/data=0 and expanddown=1 before continuing.
10990 */
10991 uint32_t offFixupCheckExpandDown;
10992 if (fAccess & IEM_ACCESS_TYPE_WRITE)
10993 {
10994 /* test segattrs, X86_SEL_TYPE_DOWN */
10995 AssertCompile(X86_SEL_TYPE_DOWN < 128);
10996 off = iemNativeEmitTestAnyBitsInGpr8Ex(pbCodeBuf, off, idxRegSegAttrib, X86_SEL_TYPE_DOWN);
10997 /* jnz check_expand_down */
10998 offFixupCheckExpandDown = off;
10999 off = iemNativeEmitJccToFixedEx(pbCodeBuf, off, off /*ASSUMES rel8 suffices*/, kIemNativeInstrCond_ne);
11000 }
11001 else
11002 {
11003 /* mov reg1, segattrs */
11004 off = iemNativeEmitLoadGprFromGpr32Ex(pbCodeBuf, off, idxReg1, idxRegSegAttrib);
11005 /* and reg1, code | down */
11006 off = iemNativeEmitAndGpr32ByImmEx(pbCodeBuf, off, idxReg1, X86_SEL_TYPE_CODE | X86_SEL_TYPE_DOWN);
11007 /* cmp reg1, down */
11008 off = iemNativeEmitCmpGpr32WithImmEx(pbCodeBuf, off, idxReg1, X86_SEL_TYPE_DOWN);
11009 /* je check_expand_down */
11010 offFixupCheckExpandDown = off;
11011 off = iemNativeEmitJccToFixedEx(pbCodeBuf, off, off /*ASSUMES rel8 suffices*/, kIemNativeInstrCond_e);
11012 }
11013
11014 /* expand_up:
11015 cmp regptr/reg2, seglim */
11016 off = iemNativeEmitCmpGprWithGprEx(pbCodeBuf, off, cbMem > 1 ? idxReg2 : idxRegPtr, idxRegSegLimit);
11017 /* ja tlbmiss */
11018 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
11019 /* jmp limitdone */
11020 uint32_t const offFixupLimitDone = off;
11021 off = iemNativeEmitJmpToFixedEx(pbCodeBuf, off, off /*ASSUMES rel8 suffices*/);
11022
11023 /* check_expand_down: ; complicted! */
11024 iemNativeFixupFixedJump(pReNative, offFixupCheckExpandDown, off);
11025 /* cmp regptr, seglim */
11026 off = iemNativeEmitCmpGprWithGprEx(pbCodeBuf, off, idxRegPtr, idxRegSegLimit);
11027 /* jbe tlbmiss */
11028 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
11029 /* mov reg1, X86DESCATTR_D (0x4000) */
11030 off = iemNativeEmitCmpGpr32WithImmEx(pbCodeBuf, off, idxReg1, X86DESCATTR_D);
11031 /* and reg1, segattr */
11032 off = iemNativeEmitAndGpr32ByGpr32Ex(pbCodeBuf, off, idxReg1, idxRegSegAttrib);
11033 /* xor reg1, X86DESCATTR_D */
11034 off = iemNativeEmitXorGpr32ByImmEx(pbCodeBuf, off, idxReg1, X86DESCATTR_D);
11035 /* shl reg1, 2 (16 - 14) */
11036 AssertCompile((X86DESCATTR_D << 2) == UINT32_C(0x10000));
11037 off = iemNativeEmitShiftGpr32LeftEx(pbCodeBuf, off, idxReg1, 2);
11038 /* dec reg1 (=> 0xffff if D=0; 0xffffffff if D=1) */
11039 off = iemNativeEmitSubGpr32ImmEx(pbCodeBuf, off, idxReg1, 1);
11040 /* cmp reg2, reg1 (64-bit) */
11041 off = iemNativeEmitCmpGpr32WithGprEx(pbCodeBuf, off, idxReg2, idxReg1);
11042 /* ja tlbmiss */
11043 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
11044
11045 /* limitdone: */
11046 iemNativeFixupFixedJump(pReNative, offFixupLimitDone, off);
11047 }
11048
11049 /* 1b. Add the segment base. We use idxRegMemResult for the ptr register if this step is required. */
11050 uint8_t const idxRegFlatPtr = iSegReg != UINT8_MAX ? idxRegMemResult : idxRegPtr;
11051 if (iSegReg != UINT8_MAX)
11052 {
11053 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
11054 {
11055 Assert(iSegReg >= X86_SREG_FS);
11056 /* mov regflat, regptr */
11057 off = iemNativeEmitLoadGprFromGprEx(pbCodeBuf, off, idxRegFlatPtr, idxRegPtr);
11058 /* add regflat, seg.base */
11059 off = iemNativeEmitAddTwoGprsEx(pbCodeBuf, off, idxRegFlatPtr, idxRegSegBase);
11060 }
11061 else
11062 {
11063 /* mov regflat, regptr */
11064 off = iemNativeEmitLoadGprFromGpr32Ex(pbCodeBuf, off, idxRegFlatPtr, idxRegPtr);
11065 /* add regflat, seg.base */
11066 off = iemNativeEmitAddTwoGprs32Ex(pbCodeBuf, off, idxRegFlatPtr, idxRegSegBase);
11067 }
11068 }
11069
11070 /*
11071 * 2. Check that the address doesn't cross a page boundrary and doesn't have alignment issues.
11072 *
11073 * 2a. Alignment check using fAlignMask.
11074 */
11075 Assert(RT_IS_POWER_OF_TWO(fAlignMask + 1));
11076 Assert(fAlignMask < 128);
11077 /* test regflat, fAlignMask */
11078 off = iemNativeEmitTestAnyBitsInGpr8Ex(pbCodeBuf, off, idxRegFlatPtr, fAlignMask);
11079 /* jnz tlbmiss */
11080 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
11081
11082 /*
11083 * 2b. Check that it's not crossing page a boundrary. This is implicit in
11084 * the previous test if the alignment is same or larger than the type.
11085 */
11086 if (cbMem > fAlignMask + 1)
11087 {
11088 /* mov reg1, 0xfff */
11089 off = iemNativeEmitLoadGpr32ImmEx(pbCodeBuf, off, idxReg1, GUEST_PAGE_OFFSET_MASK);
11090 /* and reg1, regflat */
11091 off = iemNativeEmitAndGpr32ByGpr32Ex(pbCodeBuf, off, idxReg1, idxRegFlatPtr);
11092 /* neg reg1 */
11093 off = iemNativeEmitNegGpr32Ex(pbCodeBuf, off, idxReg1);
11094 /* add reg1, 0x1000 */
11095 off = iemNativeEmitAddGpr32ImmEx(pbCodeBuf, off, idxReg1, GUEST_PAGE_SIZE);
11096 /* cmp reg1, cbMem */
11097 off = iemNativeEmitCmpGpr32WithImmEx(pbCodeBuf, off, idxReg1, GUEST_PAGE_SIZE);
11098 /* ja tlbmiss */
11099 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
11100 }
11101
11102 /*
11103 * 3. TLB lookup.
11104 *
11105 * 3a. Calculate the TLB tag value (IEMTLB_CALC_TAG).
11106 * In 64-bit mode we will also check for non-canonical addresses here.
11107 */
11108 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
11109 {
11110 /* mov reg1, regflat */
11111 off = iemNativeEmitLoadGprFromGprEx(pbCodeBuf, off, idxReg1, idxRegFlatPtr);
11112 /* rol reg1, 16 */
11113 off = iemNativeEmitRotateGprLeftEx(pbCodeBuf, off, idxReg1, 16);
11114 /** @todo Would 'movsx reg2, word reg1' and working on reg2 in dwords be faster? */
11115 /* inc word reg1 */
11116 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11117 if (idxReg1 >= 8)
11118 pbCodeBuf[off++] = X86_OP_REX_B;
11119 pbCodeBuf[off++] = 0xff;
11120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg1 & 7);
11121 /* cmp word reg1, 1 */
11122 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11123 if (idxReg1 >= 8)
11124 pbCodeBuf[off++] = X86_OP_REX_B;
11125 pbCodeBuf[off++] = 0x83;
11126 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, idxReg1 & 7);
11127 pbCodeBuf[off++] = 1;
11128 /* ja tlbmiss */
11129 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
11130 /* shr reg1, 16 + GUEST_PAGE_SHIFT */
11131 off = iemNativeEmitShiftGprRightEx(pbCodeBuf, off, idxReg1, 16 + GUEST_PAGE_SHIFT);
11132 }
11133 else
11134 {
11135 /* mov reg1, regflat */
11136 off = iemNativeEmitLoadGprFromGpr32Ex(pbCodeBuf, off, idxReg1, idxRegFlatPtr);
11137 /* shr reg1, GUEST_PAGE_SHIFT */
11138 off = iemNativeEmitShiftGpr32RightEx(pbCodeBuf, off, idxReg1, GUEST_PAGE_SHIFT);
11139 }
11140 /* or reg1, [qword pVCpu->iem.s.DataTlb.uTlbRevision] */
11141 pbCodeBuf[off++] = idxReg1 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
11142 pbCodeBuf[off++] = 0x0b; /* OR r64,r/m64 */
11143 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg1, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbRevision));
11144
11145 /*
11146 * 3b. Calc pTlbe.
11147 */
11148 /* movzx reg2, byte reg1 */
11149 off = iemNativeEmitLoadGprFromGpr8Ex(pbCodeBuf, off, idxReg2, idxReg1);
11150 /* shl reg2, 5 ; reg2 *= sizeof(IEMTLBENTRY) */
11151 AssertCompileSize(IEMTLBENTRY, 32);
11152 off = iemNativeEmitShiftGprLeftEx(pbCodeBuf, off, idxReg2, 5);
11153 /* lea reg2, [pVCpu->iem.s.DataTlb.aEntries + reg2] */
11154 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU < 8);
11155 pbCodeBuf[off++] = idxReg2 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_X | X86_OP_REX_R;
11156 pbCodeBuf[off++] = 0x8d;
11157 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxReg2 & 7, 4 /*SIB*/);
11158 pbCodeBuf[off++] = X86_SIB_MAKE(IEMNATIVE_REG_FIXED_PVMCPU & 7, idxReg2 & 7, 0);
11159 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
11160 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
11161 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
11162 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
11163
11164 /*
11165 * 3c. Compare the TLBE.uTag with the one from 2a (reg1).
11166 */
11167 /* cmp reg1, [reg2] */
11168 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg1 < 8 ? 0 : X86_OP_REX_R) | (idxReg2 < 8 ? 0 : X86_OP_REX_B);
11169 pbCodeBuf[off++] = 0x3b;
11170 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, idxReg1, idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
11171 /* jne tlbmiss */
11172 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
11173
11174 /*
11175 * 4. Check TLB page table level access flags and physical page revision #.
11176 */
11177 /* mov reg1, mask */
11178 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
11179 uint64_t const fNoUser = (((pReNative->fExec >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK) + 1) & IEMTLBE_F_PT_NO_USER;
11180 off = iemNativeEmitLoadGprImmEx(pbCodeBuf, off, idxReg1,
11181 IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
11182 | IEMTLBE_F_PG_UNASSIGNED | IEMTLBE_F_PG_NO_READ
11183 | IEMTLBE_F_PT_NO_ACCESSED | fNoUser);
11184 /* and reg1, [reg2->fFlagsAndPhysRev] */
11185 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg1 < 8 ? 0 : X86_OP_REX_R) | (idxReg2 < 8 ? 0 : X86_OP_REX_B);
11186 pbCodeBuf[off++] = 0x23;
11187 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, idxReg1, idxReg2, RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
11188
11189 /* cmp reg1, [pVCpu->iem.s.DataTlb.uTlbPhysRev] */
11190 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg1 < 8 ? 0 : X86_OP_REX_R);
11191 pbCodeBuf[off++] = 0x3b;
11192 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, idxReg1, IEMNATIVE_REG_FIXED_PVMCPU,
11193 RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbPhysRev));
11194 /* jne tlbmiss */
11195 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
11196
11197 /*
11198 * 5. Check that pbMappingR3 isn't NULL (paranoia) and calculate the
11199 * resulting pointer.
11200 */
11201 /* mov reg1, [reg2->pbMappingR3] */
11202 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg1 < 8 ? 0 : X86_OP_REX_R) | (idxReg2 < 8 ? 0 : X86_OP_REX_B);
11203 pbCodeBuf[off++] = 0x8b;
11204 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, idxRegMemResult, idxReg2, RT_UOFFSETOF(IEMTLBENTRY, pbMappingR3));
11205
11206 /** @todo eliminate the need for this test? */
11207 /* test reg1, reg1 */
11208 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg1 < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
11209 pbCodeBuf[off++] = 0x85;
11210 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxReg1 & 7, idxReg1 & 7);
11211
11212 /* jz tlbmiss */
11213 off = iemNativeEmitJccToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbDone, kIemNativeInstrCond_e);
11214
11215 if (idxRegFlatPtr == idxRegMemResult) /* See step 1b. */
11216 {
11217 /* and result, 0xfff */
11218 off = iemNativeEmitAndGpr32ByImmEx(pbCodeBuf, off, idxRegMemResult, GUEST_PAGE_OFFSET_MASK);
11219 }
11220 else
11221 {
11222 Assert(idxRegFlatPtr == idxRegPtr);
11223 /* mov result, 0xfff */
11224 off = iemNativeEmitLoadGpr32ImmEx(pbCodeBuf, off, idxRegMemResult, GUEST_PAGE_OFFSET_MASK);
11225 /* and result, regflat */
11226 off = iemNativeEmitAndGpr32ByGpr32Ex(pbCodeBuf, off, idxRegMemResult, idxRegFlatPtr);
11227 }
11228 /* add result, reg1 */
11229 off = iemNativeEmitAddTwoGprsEx(pbCodeBuf, off, idxRegMemResult, idxReg1);
11230
11231 /* jmp tlbdone */
11232 off = iemNativeEmitJmpToLabelEx(pReNative, pbCodeBuf, off, idxLabelTlbDone);
11233
11234 iemNativeVarRegisterRelease(pReNative, idxRegPtr);
11235 iemNativeRegFree(pReNative, idxRegSegBase);
11236 iemNativeRegFree(pReNative, idxRegSegLimit);
11237 iemNativeRegFree(pReNative, idxRegSegAttrib);
11238 iemNativeRegFree(pReNative, idxReg2);
11239 iemNativeRegFree(pReNative, idxReg1);
11240
11241#else
11242 /** @todo arm64 TLB code */
11243 RT_NOREF(fAccess, fAlignMask, cbMem);
11244#endif
11245
11246 /*
11247 * Call helper to do the fetching.
11248 * We flush all guest register shadow copies here.
11249 */
11250 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11251
11252#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11253 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11254#else
11255 RT_NOREF(idxInstr);
11256#endif
11257
11258 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
11259 if (iSegReg != UINT8_MAX)
11260 {
11261 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11262 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
11263 }
11264
11265 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem */
11266 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem);
11267
11268 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo */
11269 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
11270 off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo, true /*fFlushShadows*/);
11271
11272 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11273 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11274
11275 /* Done setting up parameters, make the call. */
11276 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11277
11278 /*
11279 * Put the result in the right register .
11280 */
11281 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
11282 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
11283 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
11284 iemNativeVarRegisterRelease(pReNative, idxVarMem);
11285
11286 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11287
11288 return off;
11289}
11290
11291
11292#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
11293 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
11294 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
11295
11296#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
11297 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
11298 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
11299
11300#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
11301 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
11302 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
11303
11304DECL_INLINE_THROW(uint32_t)
11305iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
11306 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
11307{
11308 /*
11309 * Assert sanity.
11310 */
11311 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
11312 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
11313 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
11314 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
11315#ifdef VBOX_STRICT
11316 switch (fAccess & IEM_ACCESS_TYPE_MASK)
11317 {
11318 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
11319 case IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
11320 case IEM_ACCESS_TYPE_READ: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
11321 default: AssertFailed();
11322 }
11323#else
11324 RT_NOREF(fAccess);
11325#endif
11326
11327 /*
11328 * To keep things simple we have to commit any pending writes first as we
11329 * may end up making calls (there shouldn't be any at this point, so this
11330 * is just for consistency).
11331 */
11332 /** @todo we could postpone this till we make the call and reload the
11333 * registers after returning from the call. Not sure if that's sensible or
11334 * not, though. */
11335 off = iemNativeRegFlushPendingWrites(pReNative, off);
11336
11337 /*
11338 * Move/spill/flush stuff out of call-volatile registers.
11339 *
11340 * We exclude any register holding the bUnmapInfo variable, as we'll be
11341 * checking it after returning from the call and will free it afterwards.
11342 */
11343 /** @todo save+restore active registers and maybe guest shadows in miss
11344 * scenario. */
11345 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
11346
11347 /*
11348 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
11349 * to call the unmap helper function.
11350 *
11351 * The likelyhood of it being zero is higher than for the TLB hit when doing
11352 * the mapping, as a TLB miss for an well aligned and unproblematic memory
11353 * access should also end up with a mapping that won't need special unmapping.
11354 */
11355 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
11356 * should speed up things for the pure interpreter as well when TLBs
11357 * are enabled. */
11358#ifdef RT_ARCH_AMD64
11359 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
11360 {
11361 /* test byte [rbp - xxx], 0ffh */
11362 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11363 pbCodeBuf[off++] = 0xf6;
11364 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
11365 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11366 pbCodeBuf[off++] = 0xff;
11367 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11368 }
11369 else
11370#endif
11371 {
11372 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
11373 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
11374 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
11375 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
11376 }
11377 uint32_t const offJmpFixup = off;
11378 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
11379
11380 /*
11381 * Call the unmap helper function.
11382 */
11383#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
11384 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11385#else
11386 RT_NOREF(idxInstr);
11387#endif
11388
11389 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
11390 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
11391 0 /*offAddend*/, true /*fVarAllowInVolatileReg*/);
11392
11393 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11394 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11395
11396 /* Done setting up parameters, make the call. */
11397 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11398
11399 /* The bUnmapInfo variable is implictly free by these MCs. */
11400 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
11401
11402 /*
11403 * Done, just fixup the jump for the non-call case.
11404 */
11405 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
11406
11407 return off;
11408}
11409
11410
11411
11412/*********************************************************************************************************************************
11413* State and Exceptions *
11414*********************************************************************************************************************************/
11415
11416#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11417#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11418
11419#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11420#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11421#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11422
11423#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11424#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11425#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11426
11427
11428DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
11429{
11430 /** @todo this needs a lot more work later. */
11431 RT_NOREF(pReNative, fForChange);
11432 return off;
11433}
11434
11435
11436/*********************************************************************************************************************************
11437* The native code generator functions for each MC block. *
11438*********************************************************************************************************************************/
11439
11440
11441/*
11442 * Include g_apfnIemNativeRecompileFunctions and associated functions.
11443 *
11444 * This should probably live in it's own file later, but lets see what the
11445 * compile times turn out to be first.
11446 */
11447#include "IEMNativeFunctions.cpp.h"
11448
11449
11450
11451/*********************************************************************************************************************************
11452* Recompiler Core. *
11453*********************************************************************************************************************************/
11454
11455
11456/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
11457static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
11458{
11459 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
11460 pDis->cbCachedInstr += cbMaxRead;
11461 RT_NOREF(cbMinRead);
11462 return VERR_NO_DATA;
11463}
11464
11465
11466/**
11467 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
11468 * @returns pszBuf.
11469 * @param fFlags The flags.
11470 * @param pszBuf The output buffer.
11471 * @param cbBuf The output buffer size. At least 32 bytes.
11472 */
11473DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
11474{
11475 Assert(cbBuf >= 32);
11476 static RTSTRTUPLE const s_aModes[] =
11477 {
11478 /* [00] = */ { RT_STR_TUPLE("16BIT") },
11479 /* [01] = */ { RT_STR_TUPLE("32BIT") },
11480 /* [02] = */ { RT_STR_TUPLE("!2!") },
11481 /* [03] = */ { RT_STR_TUPLE("!3!") },
11482 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
11483 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
11484 /* [06] = */ { RT_STR_TUPLE("!6!") },
11485 /* [07] = */ { RT_STR_TUPLE("!7!") },
11486 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
11487 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
11488 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
11489 /* [0b] = */ { RT_STR_TUPLE("!b!") },
11490 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
11491 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
11492 /* [0e] = */ { RT_STR_TUPLE("!e!") },
11493 /* [0f] = */ { RT_STR_TUPLE("!f!") },
11494 /* [10] = */ { RT_STR_TUPLE("!10!") },
11495 /* [11] = */ { RT_STR_TUPLE("!11!") },
11496 /* [12] = */ { RT_STR_TUPLE("!12!") },
11497 /* [13] = */ { RT_STR_TUPLE("!13!") },
11498 /* [14] = */ { RT_STR_TUPLE("!14!") },
11499 /* [15] = */ { RT_STR_TUPLE("!15!") },
11500 /* [16] = */ { RT_STR_TUPLE("!16!") },
11501 /* [17] = */ { RT_STR_TUPLE("!17!") },
11502 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
11503 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
11504 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
11505 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
11506 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
11507 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
11508 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
11509 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
11510 };
11511 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
11512 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
11513 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
11514
11515 pszBuf[off++] = ' ';
11516 pszBuf[off++] = 'C';
11517 pszBuf[off++] = 'P';
11518 pszBuf[off++] = 'L';
11519 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
11520 Assert(off < 32);
11521
11522 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
11523
11524 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
11525 {
11526 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
11527 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
11528 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
11529 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
11530 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
11531 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
11532 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
11533 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
11534 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
11535 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
11536 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
11537 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
11538 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
11539 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
11540 };
11541 if (fFlags)
11542 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
11543 if (s_aFlags[i].fFlag & fFlags)
11544 {
11545 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
11546 pszBuf[off++] = ' ';
11547 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
11548 off += s_aFlags[i].cchName;
11549 fFlags &= ~s_aFlags[i].fFlag;
11550 if (!fFlags)
11551 break;
11552 }
11553 pszBuf[off] = '\0';
11554
11555 return pszBuf;
11556}
11557
11558
11559DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
11560{
11561 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
11562#if defined(RT_ARCH_AMD64)
11563 static const char * const a_apszMarkers[] =
11564 {
11565 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
11566 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
11567 };
11568#endif
11569
11570 char szDisBuf[512];
11571 DISSTATE Dis;
11572 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
11573 uint32_t const cNative = pTb->Native.cInstructions;
11574 uint32_t offNative = 0;
11575#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
11576 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
11577#endif
11578 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
11579 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
11580 : DISCPUMODE_64BIT;
11581#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
11582 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
11583#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
11584 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
11585#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
11586# error "Port me"
11587#else
11588 csh hDisasm = ~(size_t)0;
11589# if defined(RT_ARCH_AMD64)
11590 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
11591# elif defined(RT_ARCH_ARM64)
11592 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
11593# else
11594# error "Port me"
11595# endif
11596 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
11597#endif
11598
11599 /*
11600 * Print TB info.
11601 */
11602 pHlp->pfnPrintf(pHlp,
11603 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
11604 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
11605 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
11606 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
11607#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
11608 if (pDbgInfo && pDbgInfo->cEntries > 1)
11609 {
11610 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
11611
11612 /*
11613 * This disassembly is driven by the debug info which follows the native
11614 * code and indicates when it starts with the next guest instructions,
11615 * where labels are and such things.
11616 */
11617 uint32_t idxThreadedCall = 0;
11618 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
11619 uint8_t idxRange = UINT8_MAX;
11620 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
11621 uint32_t offRange = 0;
11622 uint32_t offOpcodes = 0;
11623 uint32_t const cbOpcodes = pTb->cbOpcodes;
11624 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
11625 uint32_t const cDbgEntries = pDbgInfo->cEntries;
11626 uint32_t iDbgEntry = 1;
11627 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
11628
11629 while (offNative < cNative)
11630 {
11631 /* If we're at or have passed the point where the next chunk of debug
11632 info starts, process it. */
11633 if (offDbgNativeNext <= offNative)
11634 {
11635 offDbgNativeNext = UINT32_MAX;
11636 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
11637 {
11638 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
11639 {
11640 case kIemTbDbgEntryType_GuestInstruction:
11641 {
11642 /* Did the exec flag change? */
11643 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
11644 {
11645 pHlp->pfnPrintf(pHlp,
11646 " fExec change %#08x -> %#08x %s\n",
11647 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
11648 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
11649 szDisBuf, sizeof(szDisBuf)));
11650 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
11651 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
11652 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
11653 : DISCPUMODE_64BIT;
11654 }
11655
11656 /* New opcode range? We need to fend up a spurious debug info entry here for cases
11657 where the compilation was aborted before the opcode was recorded and the actual
11658 instruction was translated to a threaded call. This may happen when we run out
11659 of ranges, or when some complicated interrupts/FFs are found to be pending or
11660 similar. So, we just deal with it here rather than in the compiler code as it
11661 is a lot simpler to do here. */
11662 if ( idxRange == UINT8_MAX
11663 || idxRange >= cRanges
11664 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
11665 {
11666 idxRange += 1;
11667 if (idxRange < cRanges)
11668 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
11669 else
11670 continue;
11671 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
11672 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
11673 + (pTb->aRanges[idxRange].idxPhysPage == 0
11674 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
11675 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
11676 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
11677 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
11678 pTb->aRanges[idxRange].idxPhysPage);
11679 GCPhysPc += offRange;
11680 }
11681
11682 /* Disassemble the instruction. */
11683 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
11684 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
11685 uint32_t cbInstr = 1;
11686 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
11687 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
11688 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
11689 if (RT_SUCCESS(rc))
11690 {
11691 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
11692 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
11693 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11694 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11695
11696 static unsigned const s_offMarker = 55;
11697 static char const s_szMarker[] = " ; <--- guest";
11698 if (cch < s_offMarker)
11699 {
11700 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
11701 cch = s_offMarker;
11702 }
11703 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
11704 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
11705
11706 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
11707 }
11708 else
11709 {
11710 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
11711 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
11712 cbInstr = 1;
11713 }
11714 GCPhysPc += cbInstr;
11715 offOpcodes += cbInstr;
11716 offRange += cbInstr;
11717 continue;
11718 }
11719
11720 case kIemTbDbgEntryType_ThreadedCall:
11721 pHlp->pfnPrintf(pHlp,
11722 " Call #%u to %s (%u args) - %s\n",
11723 idxThreadedCall,
11724 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
11725 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
11726 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
11727 idxThreadedCall++;
11728 continue;
11729
11730 case kIemTbDbgEntryType_GuestRegShadowing:
11731 {
11732 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
11733 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
11734 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
11735 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
11736 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
11737 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
11738 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
11739 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
11740 else
11741 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
11742 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
11743 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
11744 continue;
11745 }
11746
11747 case kIemTbDbgEntryType_Label:
11748 {
11749 const char *pszName = "what_the_fudge";
11750 const char *pszComment = "";
11751 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
11752 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
11753 {
11754 case kIemNativeLabelType_Return:
11755 pszName = "Return";
11756 break;
11757 case kIemNativeLabelType_ReturnBreak:
11758 pszName = "ReturnBreak";
11759 break;
11760 case kIemNativeLabelType_ReturnWithFlags:
11761 pszName = "ReturnWithFlags";
11762 break;
11763 case kIemNativeLabelType_NonZeroRetOrPassUp:
11764 pszName = "NonZeroRetOrPassUp";
11765 break;
11766 case kIemNativeLabelType_RaiseGp0:
11767 pszName = "RaiseGp0";
11768 break;
11769 case kIemNativeLabelType_ObsoleteTb:
11770 pszName = "ObsoleteTb";
11771 break;
11772 case kIemNativeLabelType_NeedCsLimChecking:
11773 pszName = "NeedCsLimChecking";
11774 break;
11775 case kIemNativeLabelType_CheckBranchMiss:
11776 pszName = "CheckBranchMiss";
11777 break;
11778 case kIemNativeLabelType_If:
11779 pszName = "If";
11780 fNumbered = true;
11781 break;
11782 case kIemNativeLabelType_Else:
11783 pszName = "Else";
11784 fNumbered = true;
11785 pszComment = " ; regs state restored pre-if-block";
11786 break;
11787 case kIemNativeLabelType_Endif:
11788 pszName = "Endif";
11789 fNumbered = true;
11790 break;
11791 case kIemNativeLabelType_CheckIrq:
11792 pszName = "CheckIrq_CheckVM";
11793 fNumbered = true;
11794 break;
11795 case kIemNativeLabelType_TlbMiss:
11796 pszName = "TlbMiss";
11797 fNumbered = true;
11798 break;
11799 case kIemNativeLabelType_TlbDone:
11800 pszName = "TlbDone";
11801 fNumbered = true;
11802 break;
11803 case kIemNativeLabelType_Invalid:
11804 case kIemNativeLabelType_End:
11805 break;
11806 }
11807 if (fNumbered)
11808 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
11809 else
11810 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
11811 continue;
11812 }
11813
11814 case kIemTbDbgEntryType_NativeOffset:
11815 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
11816 Assert(offDbgNativeNext > offNative);
11817 break;
11818
11819 default:
11820 AssertFailed();
11821 }
11822 iDbgEntry++;
11823 break;
11824 }
11825 }
11826
11827 /*
11828 * Disassemble the next native instruction.
11829 */
11830 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
11831# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
11832 uint32_t cbInstr = sizeof(paNative[0]);
11833 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
11834 if (RT_SUCCESS(rc))
11835 {
11836# if defined(RT_ARCH_AMD64)
11837 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
11838 {
11839 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
11840 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
11841 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
11842 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
11843 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
11844 uInfo & 0x8000 ? "recompiled" : "todo");
11845 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
11846 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
11847 else
11848 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
11849 }
11850 else
11851# endif
11852 {
11853# ifdef RT_ARCH_AMD64
11854 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
11855 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
11856 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11857 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11858# elif defined(RT_ARCH_ARM64)
11859 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
11860 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11861 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11862# else
11863# error "Port me"
11864# endif
11865 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
11866 }
11867 }
11868 else
11869 {
11870# if defined(RT_ARCH_AMD64)
11871 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
11872 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
11873# elif defined(RT_ARCH_ARM64)
11874 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
11875# else
11876# error "Port me"
11877# endif
11878 cbInstr = sizeof(paNative[0]);
11879 }
11880 offNative += cbInstr / sizeof(paNative[0]);
11881
11882# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
11883 cs_insn *pInstr;
11884 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
11885 (uintptr_t)pNativeCur, 1, &pInstr);
11886 if (cInstrs > 0)
11887 {
11888 Assert(cInstrs == 1);
11889# if defined(RT_ARCH_AMD64)
11890 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
11891 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
11892# else
11893 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
11894 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
11895# endif
11896 offNative += pInstr->size / sizeof(*pNativeCur);
11897 cs_free(pInstr, cInstrs);
11898 }
11899 else
11900 {
11901# if defined(RT_ARCH_AMD64)
11902 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
11903 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
11904# else
11905 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
11906# endif
11907 offNative++;
11908 }
11909# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
11910 }
11911 }
11912 else
11913#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
11914 {
11915 /*
11916 * No debug info, just disassemble the x86 code and then the native code.
11917 *
11918 * First the guest code:
11919 */
11920 for (unsigned i = 0; i < pTb->cRanges; i++)
11921 {
11922 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
11923 + (pTb->aRanges[i].idxPhysPage == 0
11924 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
11925 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
11926 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
11927 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
11928 unsigned off = pTb->aRanges[i].offOpcodes;
11929 /** @todo this ain't working when crossing pages! */
11930 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
11931 while (off < cbOpcodes)
11932 {
11933 uint32_t cbInstr = 1;
11934 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
11935 &pTb->pabOpcodes[off], cbOpcodes - off,
11936 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
11937 if (RT_SUCCESS(rc))
11938 {
11939 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
11940 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
11941 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11942 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11943 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
11944 GCPhysPc += cbInstr;
11945 off += cbInstr;
11946 }
11947 else
11948 {
11949 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
11950 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
11951 break;
11952 }
11953 }
11954 }
11955
11956 /*
11957 * Then the native code:
11958 */
11959 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
11960 while (offNative < cNative)
11961 {
11962 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
11963# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
11964 uint32_t cbInstr = sizeof(paNative[0]);
11965 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
11966 if (RT_SUCCESS(rc))
11967 {
11968# if defined(RT_ARCH_AMD64)
11969 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
11970 {
11971 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
11972 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
11973 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
11974 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
11975 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
11976 uInfo & 0x8000 ? "recompiled" : "todo");
11977 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
11978 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
11979 else
11980 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
11981 }
11982 else
11983# endif
11984 {
11985# ifdef RT_ARCH_AMD64
11986 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
11987 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
11988 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11989 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11990# elif defined(RT_ARCH_ARM64)
11991 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
11992 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11993 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11994# else
11995# error "Port me"
11996# endif
11997 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
11998 }
11999 }
12000 else
12001 {
12002# if defined(RT_ARCH_AMD64)
12003 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
12004 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
12005# else
12006 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
12007# endif
12008 cbInstr = sizeof(paNative[0]);
12009 }
12010 offNative += cbInstr / sizeof(paNative[0]);
12011
12012# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12013 cs_insn *pInstr;
12014 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
12015 (uintptr_t)pNativeCur, 1, &pInstr);
12016 if (cInstrs > 0)
12017 {
12018 Assert(cInstrs == 1);
12019# if defined(RT_ARCH_AMD64)
12020 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
12021 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
12022# else
12023 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
12024 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
12025# endif
12026 offNative += pInstr->size / sizeof(*pNativeCur);
12027 cs_free(pInstr, cInstrs);
12028 }
12029 else
12030 {
12031# if defined(RT_ARCH_AMD64)
12032 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
12033 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
12034# else
12035 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
12036# endif
12037 offNative++;
12038 }
12039# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12040 }
12041 }
12042
12043#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12044 /* Cleanup. */
12045 cs_close(&hDisasm);
12046#endif
12047}
12048
12049
12050/**
12051 * Recompiles the given threaded TB into a native one.
12052 *
12053 * In case of failure the translation block will be returned as-is.
12054 *
12055 * @returns pTb.
12056 * @param pVCpu The cross context virtual CPU structure of the calling
12057 * thread.
12058 * @param pTb The threaded translation to recompile to native.
12059 */
12060DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
12061{
12062 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
12063
12064 /*
12065 * The first time thru, we allocate the recompiler state, the other times
12066 * we just need to reset it before using it again.
12067 */
12068 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
12069 if (RT_LIKELY(pReNative))
12070 iemNativeReInit(pReNative, pTb);
12071 else
12072 {
12073 pReNative = iemNativeInit(pVCpu, pTb);
12074 AssertReturn(pReNative, pTb);
12075 }
12076
12077 /*
12078 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
12079 * for aborting if an error happens.
12080 */
12081 uint32_t cCallsLeft = pTb->Thrd.cCalls;
12082#ifdef LOG_ENABLED
12083 uint32_t const cCallsOrg = cCallsLeft;
12084#endif
12085 uint32_t off = 0;
12086 int rc = VINF_SUCCESS;
12087 IEMNATIVE_TRY_SETJMP(pReNative, rc)
12088 {
12089 /*
12090 * Emit prolog code (fixed).
12091 */
12092 off = iemNativeEmitProlog(pReNative, off);
12093
12094 /*
12095 * Convert the calls to native code.
12096 */
12097#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12098 int32_t iGstInstr = -1;
12099#endif
12100#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
12101 uint32_t cThreadedCalls = 0;
12102 uint32_t cRecompiledCalls = 0;
12103#endif
12104 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
12105 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
12106 while (cCallsLeft-- > 0)
12107 {
12108 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
12109
12110 /*
12111 * Debug info and assembly markup.
12112 */
12113 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
12114 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
12115#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12116 iemNativeDbgInfoAddNativeOffset(pReNative, off);
12117 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
12118 {
12119 if (iGstInstr < (int32_t)pTb->cInstructions)
12120 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
12121 else
12122 Assert(iGstInstr == pTb->cInstructions);
12123 iGstInstr = pCallEntry->idxInstr;
12124 }
12125 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
12126#endif
12127#if defined(VBOX_STRICT)
12128 off = iemNativeEmitMarker(pReNative, off,
12129 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
12130 pCallEntry->enmFunction));
12131#endif
12132#if defined(VBOX_STRICT)
12133 iemNativeRegAssertSanity(pReNative);
12134#endif
12135
12136 /*
12137 * Actual work.
12138 */
12139 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
12140 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)"));
12141 if (pfnRecom) /** @todo stats on this. */
12142 {
12143 off = pfnRecom(pReNative, off, pCallEntry);
12144 STAM_REL_STATS({cRecompiledCalls++;});
12145 }
12146 else
12147 {
12148 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
12149 STAM_REL_STATS({cThreadedCalls++;});
12150 }
12151 Assert(off <= pReNative->cInstrBufAlloc);
12152 Assert(pReNative->cCondDepth == 0);
12153
12154 /*
12155 * Advance.
12156 */
12157 pCallEntry++;
12158 }
12159
12160 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
12161 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
12162 if (!cThreadedCalls)
12163 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
12164
12165 /*
12166 * Emit the epilog code.
12167 */
12168 uint32_t idxReturnLabel;
12169 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
12170
12171 /*
12172 * Generate special jump labels.
12173 */
12174 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
12175 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
12176 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
12177 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
12178 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
12179 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
12180 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
12181 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
12182 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
12183 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
12184 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
12185 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
12186 }
12187 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
12188 {
12189 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
12190 return pTb;
12191 }
12192 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
12193 Assert(off <= pReNative->cInstrBufAlloc);
12194
12195 /*
12196 * Make sure all labels has been defined.
12197 */
12198 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
12199#ifdef VBOX_STRICT
12200 uint32_t const cLabels = pReNative->cLabels;
12201 for (uint32_t i = 0; i < cLabels; i++)
12202 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
12203#endif
12204
12205 /*
12206 * Allocate executable memory, copy over the code we've generated.
12207 */
12208 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
12209 if (pTbAllocator->pDelayedFreeHead)
12210 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
12211
12212 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
12213 AssertReturn(paFinalInstrBuf, pTb);
12214 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
12215
12216 /*
12217 * Apply fixups.
12218 */
12219 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
12220 uint32_t const cFixups = pReNative->cFixups;
12221 for (uint32_t i = 0; i < cFixups; i++)
12222 {
12223 Assert(paFixups[i].off < off);
12224 Assert(paFixups[i].idxLabel < cLabels);
12225 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
12226 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
12227 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
12228 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
12229 switch (paFixups[i].enmType)
12230 {
12231#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
12232 case kIemNativeFixupType_Rel32:
12233 Assert(paFixups[i].off + 4 <= off);
12234 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12235 continue;
12236
12237#elif defined(RT_ARCH_ARM64)
12238 case kIemNativeFixupType_RelImm26At0:
12239 {
12240 Assert(paFixups[i].off < off);
12241 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12242 Assert(offDisp >= -262144 && offDisp < 262144);
12243 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
12244 continue;
12245 }
12246
12247 case kIemNativeFixupType_RelImm19At5:
12248 {
12249 Assert(paFixups[i].off < off);
12250 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12251 Assert(offDisp >= -262144 && offDisp < 262144);
12252 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
12253 continue;
12254 }
12255
12256 case kIemNativeFixupType_RelImm14At5:
12257 {
12258 Assert(paFixups[i].off < off);
12259 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12260 Assert(offDisp >= -8192 && offDisp < 8192);
12261 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
12262 continue;
12263 }
12264
12265#endif
12266 case kIemNativeFixupType_Invalid:
12267 case kIemNativeFixupType_End:
12268 break;
12269 }
12270 AssertFailed();
12271 }
12272
12273 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
12274 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
12275
12276 /*
12277 * Convert the translation block.
12278 */
12279 RTMemFree(pTb->Thrd.paCalls);
12280 pTb->Native.paInstructions = paFinalInstrBuf;
12281 pTb->Native.cInstructions = off;
12282 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
12283#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12284 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
12285 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
12286#endif
12287
12288 Assert(pTbAllocator->cThreadedTbs > 0);
12289 pTbAllocator->cThreadedTbs -= 1;
12290 pTbAllocator->cNativeTbs += 1;
12291 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
12292
12293#ifdef LOG_ENABLED
12294 /*
12295 * Disassemble to the log if enabled.
12296 */
12297 if (LogIs3Enabled())
12298 {
12299 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
12300 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
12301# ifdef DEBUG_bird
12302 RTLogFlush(NULL);
12303# endif
12304 }
12305#endif
12306 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
12307
12308 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
12309 return pTb;
12310}
12311
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette