VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102727

Last change on this file since 102727 was 102724, checked in by vboxsync, 14 months ago

VMM/IEM: Moved the TLB lookup code emitting into a separate function and straighten the code path a little by starting with a jump and moving the check_expand_down stuff in after tlbmiss. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 558.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102724 2023-12-28 21:15:52Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128/*********************************************************************************************************************************
129* Internal Functions *
130*********************************************************************************************************************************/
131#ifdef VBOX_STRICT
132static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
133 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
134static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
135#endif
136#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
137static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
138static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
139#endif
140DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
142 IEMNATIVEGSTREG enmGstReg, uint32_t off);
143
144
145/*********************************************************************************************************************************
146* Executable Memory Allocator *
147*********************************************************************************************************************************/
148/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
149 * Use an alternative chunk sub-allocator that does store internal data
150 * in the chunk.
151 *
152 * Using the RTHeapSimple is not practial on newer darwin systems where
153 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
154 * memory. We would have to change the protection of the whole chunk for
155 * every call to RTHeapSimple, which would be rather expensive.
156 *
157 * This alternative implemenation let restrict page protection modifications
158 * to the pages backing the executable memory we just allocated.
159 */
160#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
161/** The chunk sub-allocation unit size in bytes. */
162#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
163/** The chunk sub-allocation unit size as a shift factor. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
165
166#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
167# ifdef IEMNATIVE_USE_GDB_JIT
168# define IEMNATIVE_USE_GDB_JIT_ET_DYN
169
170/** GDB JIT: Code entry. */
171typedef struct GDBJITCODEENTRY
172{
173 struct GDBJITCODEENTRY *pNext;
174 struct GDBJITCODEENTRY *pPrev;
175 uint8_t *pbSymFile;
176 uint64_t cbSymFile;
177} GDBJITCODEENTRY;
178
179/** GDB JIT: Actions. */
180typedef enum GDBJITACTIONS : uint32_t
181{
182 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
183} GDBJITACTIONS;
184
185/** GDB JIT: Descriptor. */
186typedef struct GDBJITDESCRIPTOR
187{
188 uint32_t uVersion;
189 GDBJITACTIONS enmAction;
190 GDBJITCODEENTRY *pRelevant;
191 GDBJITCODEENTRY *pHead;
192 /** Our addition: */
193 GDBJITCODEENTRY *pTail;
194} GDBJITDESCRIPTOR;
195
196/** GDB JIT: Our simple symbol file data. */
197typedef struct GDBJITSYMFILE
198{
199 Elf64_Ehdr EHdr;
200# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
201 Elf64_Shdr aShdrs[5];
202# else
203 Elf64_Shdr aShdrs[7];
204 Elf64_Phdr aPhdrs[2];
205# endif
206 /** The dwarf ehframe data for the chunk. */
207 uint8_t abEhFrame[512];
208 char szzStrTab[128];
209 Elf64_Sym aSymbols[3];
210# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
211 Elf64_Sym aDynSyms[2];
212 Elf64_Dyn aDyn[6];
213# endif
214} GDBJITSYMFILE;
215
216extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
217extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
218
219/** Init once for g_IemNativeGdbJitLock. */
220static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
221/** Init once for the critical section. */
222static RTCRITSECT g_IemNativeGdbJitLock;
223
224/** GDB reads the info here. */
225GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
226
227/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
228DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
229{
230 ASMNopPause();
231}
232
233/** @callback_method_impl{FNRTONCE} */
234static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
235{
236 RT_NOREF(pvUser);
237 return RTCritSectInit(&g_IemNativeGdbJitLock);
238}
239
240
241# endif /* IEMNATIVE_USE_GDB_JIT */
242
243/**
244 * Per-chunk unwind info for non-windows hosts.
245 */
246typedef struct IEMEXECMEMCHUNKEHFRAME
247{
248# ifdef IEMNATIVE_USE_LIBUNWIND
249 /** The offset of the FDA into abEhFrame. */
250 uintptr_t offFda;
251# else
252 /** 'struct object' storage area. */
253 uint8_t abObject[1024];
254# endif
255# ifdef IEMNATIVE_USE_GDB_JIT
256# if 0
257 /** The GDB JIT 'symbol file' data. */
258 GDBJITSYMFILE GdbJitSymFile;
259# endif
260 /** The GDB JIT list entry. */
261 GDBJITCODEENTRY GdbJitEntry;
262# endif
263 /** The dwarf ehframe data for the chunk. */
264 uint8_t abEhFrame[512];
265} IEMEXECMEMCHUNKEHFRAME;
266/** Pointer to per-chunk info info for non-windows hosts. */
267typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
268#endif
269
270
271/**
272 * An chunk of executable memory.
273 */
274typedef struct IEMEXECMEMCHUNK
275{
276#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
277 /** Number of free items in this chunk. */
278 uint32_t cFreeUnits;
279 /** Hint were to start searching for free space in the allocation bitmap. */
280 uint32_t idxFreeHint;
281#else
282 /** The heap handle. */
283 RTHEAPSIMPLE hHeap;
284#endif
285 /** Pointer to the chunk. */
286 void *pvChunk;
287#ifdef IN_RING3
288 /**
289 * Pointer to the unwind information.
290 *
291 * This is used during C++ throw and longjmp (windows and probably most other
292 * platforms). Some debuggers (windbg) makes use of it as well.
293 *
294 * Windows: This is allocated from hHeap on windows because (at least for
295 * AMD64) the UNWIND_INFO structure address in the
296 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
297 *
298 * Others: Allocated from the regular heap to avoid unnecessary executable data
299 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
300 void *pvUnwindInfo;
301#elif defined(IN_RING0)
302 /** Allocation handle. */
303 RTR0MEMOBJ hMemObj;
304#endif
305} IEMEXECMEMCHUNK;
306/** Pointer to a memory chunk. */
307typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
308
309
310/**
311 * Executable memory allocator for the native recompiler.
312 */
313typedef struct IEMEXECMEMALLOCATOR
314{
315 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
316 uint32_t uMagic;
317
318 /** The chunk size. */
319 uint32_t cbChunk;
320 /** The maximum number of chunks. */
321 uint32_t cMaxChunks;
322 /** The current number of chunks. */
323 uint32_t cChunks;
324 /** Hint where to start looking for available memory. */
325 uint32_t idxChunkHint;
326 /** Statistics: Current number of allocations. */
327 uint32_t cAllocations;
328
329 /** The total amount of memory available. */
330 uint64_t cbTotal;
331 /** Total amount of free memory. */
332 uint64_t cbFree;
333 /** Total amount of memory allocated. */
334 uint64_t cbAllocated;
335
336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
337 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
338 *
339 * Since the chunk size is a power of two and the minimum chunk size is a lot
340 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
341 * require a whole number of uint64_t elements in the allocation bitmap. So,
342 * for sake of simplicity, they are allocated as one continous chunk for
343 * simplicity/laziness. */
344 uint64_t *pbmAlloc;
345 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
346 uint32_t cUnitsPerChunk;
347 /** Number of bitmap elements per chunk (for quickly locating the bitmap
348 * portion corresponding to an chunk). */
349 uint32_t cBitmapElementsPerChunk;
350#else
351 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
352 * @{ */
353 /** The size of the heap internal block header. This is used to adjust the
354 * request memory size to make sure there is exacly enough room for a header at
355 * the end of the blocks we allocate before the next 64 byte alignment line. */
356 uint32_t cbHeapBlockHdr;
357 /** The size of initial heap allocation required make sure the first
358 * allocation is correctly aligned. */
359 uint32_t cbHeapAlignTweak;
360 /** The alignment tweak allocation address. */
361 void *pvAlignTweak;
362 /** @} */
363#endif
364
365#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
366 /** Pointer to the array of unwind info running parallel to aChunks (same
367 * allocation as this structure, located after the bitmaps).
368 * (For Windows, the structures must reside in 32-bit RVA distance to the
369 * actual chunk, so they are allocated off the chunk.) */
370 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
371#endif
372
373 /** The allocation chunks. */
374 RT_FLEXIBLE_ARRAY_EXTENSION
375 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
376} IEMEXECMEMALLOCATOR;
377/** Pointer to an executable memory allocator. */
378typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
379
380/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
381#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
382
383
384static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
385
386
387/**
388 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
389 * the heap statistics.
390 */
391static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
392 uint32_t cbReq, uint32_t idxChunk)
393{
394 pExecMemAllocator->cAllocations += 1;
395 pExecMemAllocator->cbAllocated += cbReq;
396#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
397 pExecMemAllocator->cbFree -= cbReq;
398#else
399 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
400#endif
401 pExecMemAllocator->idxChunkHint = idxChunk;
402
403#ifdef RT_OS_DARWIN
404 /*
405 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
406 * on darwin. So, we mark the pages returned as read+write after alloc and
407 * expect the caller to call iemExecMemAllocatorReadyForUse when done
408 * writing to the allocation.
409 *
410 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
411 * for details.
412 */
413 /** @todo detect if this is necessary... it wasn't required on 10.15 or
414 * whatever older version it was. */
415 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
416 AssertRC(rc);
417#endif
418
419 return pvRet;
420}
421
422
423#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
424static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
425 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
426{
427 /*
428 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
429 */
430 Assert(!(cToScan & 63));
431 Assert(!(idxFirst & 63));
432 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
433 pbmAlloc += idxFirst / 64;
434
435 /*
436 * Scan the bitmap for cReqUnits of consequtive clear bits
437 */
438 /** @todo This can probably be done more efficiently for non-x86 systems. */
439 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
440 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
441 {
442 uint32_t idxAddBit = 1;
443 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
444 idxAddBit++;
445 if (idxAddBit >= cReqUnits)
446 {
447 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
448
449 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
450 pChunk->cFreeUnits -= cReqUnits;
451 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
452
453 void * const pvRet = (uint8_t *)pChunk->pvChunk
454 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
455
456 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
457 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
458 }
459
460 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
461 }
462 return NULL;
463}
464#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
465
466
467static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
468{
469#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
470 /*
471 * Figure out how much to allocate.
472 */
473 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
474 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
475 {
476 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
477 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
478 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
479 {
480 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
481 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
482 if (pvRet)
483 return pvRet;
484 }
485 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
486 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
487 cReqUnits, idxChunk);
488 }
489#else
490 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
491 if (pvRet)
492 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
493#endif
494 return NULL;
495
496}
497
498
499/**
500 * Allocates @a cbReq bytes of executable memory.
501 *
502 * @returns Pointer to the memory, NULL if out of memory or other problem
503 * encountered.
504 * @param pVCpu The cross context virtual CPU structure of the calling
505 * thread.
506 * @param cbReq How many bytes are required.
507 */
508static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
509{
510 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
511 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
512 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
513
514
515 for (unsigned iIteration = 0;; iIteration++)
516 {
517 /*
518 * Adjust the request size so it'll fit the allocator alignment/whatnot.
519 *
520 * For the RTHeapSimple allocator this means to follow the logic described
521 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
522 * existing chunks if we think we've got sufficient free memory around.
523 *
524 * While for the alternative one we just align it up to a whole unit size.
525 */
526#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
527 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
528#else
529 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
530#endif
531 if (cbReq <= pExecMemAllocator->cbFree)
532 {
533 uint32_t const cChunks = pExecMemAllocator->cChunks;
534 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
535 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
536 {
537 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
538 if (pvRet)
539 return pvRet;
540 }
541 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 }
548
549 /*
550 * Can we grow it with another chunk?
551 */
552 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
553 {
554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
555 AssertLogRelRCReturn(rc, NULL);
556
557 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
558 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
559 if (pvRet)
560 return pvRet;
561 AssertFailed();
562 }
563
564 /*
565 * Try prune native TBs once.
566 */
567 if (iIteration == 0)
568 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
569 else
570 {
571 /** @todo stats... */
572 return NULL;
573 }
574 }
575
576}
577
578
579/** This is a hook that we may need later for changing memory protection back
580 * to readonly+exec */
581static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
582{
583#ifdef RT_OS_DARWIN
584 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
585 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
586 AssertRC(rc); RT_NOREF(pVCpu);
587
588 /*
589 * Flush the instruction cache:
590 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
591 */
592 /* sys_dcache_flush(pv, cb); - not necessary */
593 sys_icache_invalidate(pv, cb);
594#else
595 RT_NOREF(pVCpu, pv, cb);
596#endif
597}
598
599
600/**
601 * Frees executable memory.
602 */
603void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
604{
605 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
606 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
607 Assert(pv);
608#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
609 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
610#else
611 Assert(!((uintptr_t)pv & 63));
612#endif
613
614 /* Align the size as we did when allocating the block. */
615#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
616 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
617#else
618 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
619#endif
620
621 /* Free it / assert sanity. */
622#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
623 uint32_t const cChunks = pExecMemAllocator->cChunks;
624 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
625 bool fFound = false;
626 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
627 {
628 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
629 fFound = offChunk < cbChunk;
630 if (fFound)
631 {
632#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
633 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
634 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
635
636 /* Check that it's valid and free it. */
637 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
638 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
639 for (uint32_t i = 1; i < cReqUnits; i++)
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
641 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
642
643 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
644 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
645
646 /* Update the stats. */
647 pExecMemAllocator->cbAllocated -= cb;
648 pExecMemAllocator->cbFree += cb;
649 pExecMemAllocator->cAllocations -= 1;
650 return;
651#else
652 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
653 break;
654#endif
655 }
656 }
657# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
658 AssertFailed();
659# else
660 Assert(fFound);
661# endif
662#endif
663
664#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
665 /* Update stats while cb is freshly calculated.*/
666 pExecMemAllocator->cbAllocated -= cb;
667 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
668 pExecMemAllocator->cAllocations -= 1;
669
670 /* Free it. */
671 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
672#endif
673}
674
675
676
677#ifdef IN_RING3
678# ifdef RT_OS_WINDOWS
679
680/**
681 * Initializes the unwind info structures for windows hosts.
682 */
683static int
684iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
685 void *pvChunk, uint32_t idxChunk)
686{
687 RT_NOREF(pVCpu);
688
689 /*
690 * The AMD64 unwind opcodes.
691 *
692 * This is a program that starts with RSP after a RET instruction that
693 * ends up in recompiled code, and the operations we describe here will
694 * restore all non-volatile registers and bring RSP back to where our
695 * RET address is. This means it's reverse order from what happens in
696 * the prologue.
697 *
698 * Note! Using a frame register approach here both because we have one
699 * and but mainly because the UWOP_ALLOC_LARGE argument values
700 * would be a pain to write initializers for. On the positive
701 * side, we're impervious to changes in the the stack variable
702 * area can can deal with dynamic stack allocations if necessary.
703 */
704 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
705 {
706 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
707 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
708 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
709 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
710 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
711 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
712 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
713 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
714 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
715 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
716 };
717 union
718 {
719 IMAGE_UNWIND_INFO Info;
720 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
721 } s_UnwindInfo =
722 {
723 {
724 /* .Version = */ 1,
725 /* .Flags = */ 0,
726 /* .SizeOfProlog = */ 16, /* whatever */
727 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
728 /* .FrameRegister = */ X86_GREG_xBP,
729 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
730 }
731 };
732 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
733 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
734
735 /*
736 * Calc how much space we need and allocate it off the exec heap.
737 */
738 unsigned const cFunctionEntries = 1;
739 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
740 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
741# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
743 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
744 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
745# else
746 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
747 - pExecMemAllocator->cbHeapBlockHdr;
748 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
749 32 /*cbAlignment*/);
750# endif
751 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
752 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
753
754 /*
755 * Initialize the structures.
756 */
757 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
758
759 paFunctions[0].BeginAddress = 0;
760 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
761 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
762
763 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
764 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
765
766 /*
767 * Register it.
768 */
769 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
770 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
771
772 return VINF_SUCCESS;
773}
774
775
776# else /* !RT_OS_WINDOWS */
777
778/**
779 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
780 */
781DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
782{
783 if (iValue >= 64)
784 {
785 Assert(iValue < 0x2000);
786 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
787 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
788 }
789 else if (iValue >= 0)
790 *Ptr.pb++ = (uint8_t)iValue;
791 else if (iValue > -64)
792 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
793 else
794 {
795 Assert(iValue > -0x2000);
796 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
797 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
798 }
799 return Ptr;
800}
801
802
803/**
804 * Emits an ULEB128 encoded value (up to 64-bit wide).
805 */
806DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
807{
808 while (uValue >= 0x80)
809 {
810 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
811 uValue >>= 7;
812 }
813 *Ptr.pb++ = (uint8_t)uValue;
814 return Ptr;
815}
816
817
818/**
819 * Emits a CFA rule as register @a uReg + offset @a off.
820 */
821DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
822{
823 *Ptr.pb++ = DW_CFA_def_cfa;
824 Ptr = iemDwarfPutUleb128(Ptr, uReg);
825 Ptr = iemDwarfPutUleb128(Ptr, off);
826 return Ptr;
827}
828
829
830/**
831 * Emits a register (@a uReg) save location:
832 * CFA + @a off * data_alignment_factor
833 */
834DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
835{
836 if (uReg < 0x40)
837 *Ptr.pb++ = DW_CFA_offset | uReg;
838 else
839 {
840 *Ptr.pb++ = DW_CFA_offset_extended;
841 Ptr = iemDwarfPutUleb128(Ptr, uReg);
842 }
843 Ptr = iemDwarfPutUleb128(Ptr, off);
844 return Ptr;
845}
846
847
848# if 0 /* unused */
849/**
850 * Emits a register (@a uReg) save location, using signed offset:
851 * CFA + @a offSigned * data_alignment_factor
852 */
853DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
854{
855 *Ptr.pb++ = DW_CFA_offset_extended_sf;
856 Ptr = iemDwarfPutUleb128(Ptr, uReg);
857 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
858 return Ptr;
859}
860# endif
861
862
863/**
864 * Initializes the unwind info section for non-windows hosts.
865 */
866static int
867iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
868 void *pvChunk, uint32_t idxChunk)
869{
870 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
871 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
872
873 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
874
875 /*
876 * Generate the CIE first.
877 */
878# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
879 uint8_t const iDwarfVer = 3;
880# else
881 uint8_t const iDwarfVer = 4;
882# endif
883 RTPTRUNION const PtrCie = Ptr;
884 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
885 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
886 *Ptr.pb++ = iDwarfVer; /* DwARF version */
887 *Ptr.pb++ = 0; /* Augmentation. */
888 if (iDwarfVer >= 4)
889 {
890 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
891 *Ptr.pb++ = 0; /* Segment selector size. */
892 }
893# ifdef RT_ARCH_AMD64
894 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
895# else
896 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
897# endif
898 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
901# elif defined(RT_ARCH_ARM64)
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
903# else
904# error "port me"
905# endif
906 /* Initial instructions: */
907# ifdef RT_ARCH_AMD64
908 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
916# elif defined(RT_ARCH_ARM64)
917# if 1
918 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
919# else
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
921# endif
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
934 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
935 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
936# else
937# error "port me"
938# endif
939 while ((Ptr.u - PtrCie.u) & 3)
940 *Ptr.pb++ = DW_CFA_nop;
941 /* Finalize the CIE size. */
942 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
943
944 /*
945 * Generate an FDE for the whole chunk area.
946 */
947# ifdef IEMNATIVE_USE_LIBUNWIND
948 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
949# endif
950 RTPTRUNION const PtrFde = Ptr;
951 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
952 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
953 Ptr.pu32++;
954 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
955 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
956# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
957 *Ptr.pb++ = DW_CFA_nop;
958# endif
959 while ((Ptr.u - PtrFde.u) & 3)
960 *Ptr.pb++ = DW_CFA_nop;
961 /* Finalize the FDE size. */
962 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
963
964 /* Terminator entry. */
965 *Ptr.pu32++ = 0;
966 *Ptr.pu32++ = 0; /* just to be sure... */
967 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
968
969 /*
970 * Register it.
971 */
972# ifdef IEMNATIVE_USE_LIBUNWIND
973 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
974# else
975 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
976 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
977# endif
978
979# ifdef IEMNATIVE_USE_GDB_JIT
980 /*
981 * Now for telling GDB about this (experimental).
982 *
983 * This seems to work best with ET_DYN.
984 */
985 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
986# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
987 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
988 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
989# else
990 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
991 - pExecMemAllocator->cbHeapBlockHdr;
992 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
993# endif
994 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
995 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
996
997 RT_ZERO(*pSymFile);
998
999 /*
1000 * The ELF header:
1001 */
1002 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1003 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1004 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1005 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1006 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1007 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1008 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1009 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1010# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1011 pSymFile->EHdr.e_type = ET_DYN;
1012# else
1013 pSymFile->EHdr.e_type = ET_REL;
1014# endif
1015# ifdef RT_ARCH_AMD64
1016 pSymFile->EHdr.e_machine = EM_AMD64;
1017# elif defined(RT_ARCH_ARM64)
1018 pSymFile->EHdr.e_machine = EM_AARCH64;
1019# else
1020# error "port me"
1021# endif
1022 pSymFile->EHdr.e_version = 1; /*?*/
1023 pSymFile->EHdr.e_entry = 0;
1024# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1025 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1026# else
1027 pSymFile->EHdr.e_phoff = 0;
1028# endif
1029 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1030 pSymFile->EHdr.e_flags = 0;
1031 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1032# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1033 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1034 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1035# else
1036 pSymFile->EHdr.e_phentsize = 0;
1037 pSymFile->EHdr.e_phnum = 0;
1038# endif
1039 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1040 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1041 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1042
1043 uint32_t offStrTab = 0;
1044#define APPEND_STR(a_szStr) do { \
1045 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1046 offStrTab += sizeof(a_szStr); \
1047 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1048 } while (0)
1049#define APPEND_STR_FMT(a_szStr, ...) do { \
1050 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1051 offStrTab++; \
1052 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1053 } while (0)
1054
1055 /*
1056 * Section headers.
1057 */
1058 /* Section header #0: NULL */
1059 unsigned i = 0;
1060 APPEND_STR("");
1061 RT_ZERO(pSymFile->aShdrs[i]);
1062 i++;
1063
1064 /* Section header: .eh_frame */
1065 pSymFile->aShdrs[i].sh_name = offStrTab;
1066 APPEND_STR(".eh_frame");
1067 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1068 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1069# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1070 pSymFile->aShdrs[i].sh_offset
1071 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1072# else
1073 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1074 pSymFile->aShdrs[i].sh_offset = 0;
1075# endif
1076
1077 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1078 pSymFile->aShdrs[i].sh_link = 0;
1079 pSymFile->aShdrs[i].sh_info = 0;
1080 pSymFile->aShdrs[i].sh_addralign = 1;
1081 pSymFile->aShdrs[i].sh_entsize = 0;
1082 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1083 i++;
1084
1085 /* Section header: .shstrtab */
1086 unsigned const iShStrTab = i;
1087 pSymFile->EHdr.e_shstrndx = iShStrTab;
1088 pSymFile->aShdrs[i].sh_name = offStrTab;
1089 APPEND_STR(".shstrtab");
1090 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1091 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1092# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1093 pSymFile->aShdrs[i].sh_offset
1094 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1095# else
1096 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1097 pSymFile->aShdrs[i].sh_offset = 0;
1098# endif
1099 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1100 pSymFile->aShdrs[i].sh_link = 0;
1101 pSymFile->aShdrs[i].sh_info = 0;
1102 pSymFile->aShdrs[i].sh_addralign = 1;
1103 pSymFile->aShdrs[i].sh_entsize = 0;
1104 i++;
1105
1106 /* Section header: .symbols */
1107 pSymFile->aShdrs[i].sh_name = offStrTab;
1108 APPEND_STR(".symtab");
1109 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1110 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1111 pSymFile->aShdrs[i].sh_offset
1112 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1113 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1114 pSymFile->aShdrs[i].sh_link = iShStrTab;
1115 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1117 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1118 i++;
1119
1120# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1121 /* Section header: .symbols */
1122 pSymFile->aShdrs[i].sh_name = offStrTab;
1123 APPEND_STR(".dynsym");
1124 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1125 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1126 pSymFile->aShdrs[i].sh_offset
1127 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1128 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1129 pSymFile->aShdrs[i].sh_link = iShStrTab;
1130 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1132 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1133 i++;
1134# endif
1135
1136# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1137 /* Section header: .dynamic */
1138 pSymFile->aShdrs[i].sh_name = offStrTab;
1139 APPEND_STR(".dynamic");
1140 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1141 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1142 pSymFile->aShdrs[i].sh_offset
1143 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1144 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1145 pSymFile->aShdrs[i].sh_link = iShStrTab;
1146 pSymFile->aShdrs[i].sh_info = 0;
1147 pSymFile->aShdrs[i].sh_addralign = 1;
1148 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1149 i++;
1150# endif
1151
1152 /* Section header: .text */
1153 unsigned const iShText = i;
1154 pSymFile->aShdrs[i].sh_name = offStrTab;
1155 APPEND_STR(".text");
1156 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1157 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1158# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1159 pSymFile->aShdrs[i].sh_offset
1160 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1161# else
1162 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1163 pSymFile->aShdrs[i].sh_offset = 0;
1164# endif
1165 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1166 pSymFile->aShdrs[i].sh_link = 0;
1167 pSymFile->aShdrs[i].sh_info = 0;
1168 pSymFile->aShdrs[i].sh_addralign = 1;
1169 pSymFile->aShdrs[i].sh_entsize = 0;
1170 i++;
1171
1172 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1173
1174# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1175 /*
1176 * The program headers:
1177 */
1178 /* Everything in a single LOAD segment: */
1179 i = 0;
1180 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1181 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1182 pSymFile->aPhdrs[i].p_offset
1183 = pSymFile->aPhdrs[i].p_vaddr
1184 = pSymFile->aPhdrs[i].p_paddr = 0;
1185 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1186 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1187 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1188 i++;
1189 /* The .dynamic segment. */
1190 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1191 pSymFile->aPhdrs[i].p_flags = PF_R;
1192 pSymFile->aPhdrs[i].p_offset
1193 = pSymFile->aPhdrs[i].p_vaddr
1194 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1195 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1196 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1197 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1198 i++;
1199
1200 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1201
1202 /*
1203 * The dynamic section:
1204 */
1205 i = 0;
1206 pSymFile->aDyn[i].d_tag = DT_SONAME;
1207 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1208 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1211 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1214 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_NULL;
1223 i++;
1224 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1225# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1226
1227 /*
1228 * Symbol tables:
1229 */
1230 /** @todo gdb doesn't seem to really like this ... */
1231 i = 0;
1232 pSymFile->aSymbols[i].st_name = 0;
1233 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1234 pSymFile->aSymbols[i].st_value = 0;
1235 pSymFile->aSymbols[i].st_size = 0;
1236 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1237 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1238# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1239 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1240# endif
1241 i++;
1242
1243 pSymFile->aSymbols[i].st_name = 0;
1244 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1245 pSymFile->aSymbols[i].st_value = 0;
1246 pSymFile->aSymbols[i].st_size = 0;
1247 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1248 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1249 i++;
1250
1251 pSymFile->aSymbols[i].st_name = offStrTab;
1252 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1253# if 0
1254 pSymFile->aSymbols[i].st_shndx = iShText;
1255 pSymFile->aSymbols[i].st_value = 0;
1256# else
1257 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1258 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1259# endif
1260 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1261 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1262 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1263# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1264 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1265 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1266# endif
1267 i++;
1268
1269 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1270 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1271
1272 /*
1273 * The GDB JIT entry and informing GDB.
1274 */
1275 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1276# if 1
1277 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1278# else
1279 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1280# endif
1281
1282 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1283 RTCritSectEnter(&g_IemNativeGdbJitLock);
1284 pEhFrame->GdbJitEntry.pNext = NULL;
1285 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1286 if (__jit_debug_descriptor.pTail)
1287 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1288 else
1289 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1290 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1291 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1292
1293 /* Notify GDB: */
1294 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1295 __jit_debug_register_code();
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1297 RTCritSectLeave(&g_IemNativeGdbJitLock);
1298
1299# else /* !IEMNATIVE_USE_GDB_JIT */
1300 RT_NOREF(pVCpu);
1301# endif /* !IEMNATIVE_USE_GDB_JIT */
1302
1303 return VINF_SUCCESS;
1304}
1305
1306# endif /* !RT_OS_WINDOWS */
1307#endif /* IN_RING3 */
1308
1309
1310/**
1311 * Adds another chunk to the executable memory allocator.
1312 *
1313 * This is used by the init code for the initial allocation and later by the
1314 * regular allocator function when it's out of memory.
1315 */
1316static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1317{
1318 /* Check that we've room for growth. */
1319 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1320 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1321
1322 /* Allocate a chunk. */
1323#ifdef RT_OS_DARWIN
1324 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1325#else
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1327#endif
1328 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1329
1330#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1331 int rc = VINF_SUCCESS;
1332#else
1333 /* Initialize the heap for the chunk. */
1334 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1335 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1336 AssertRC(rc);
1337 if (RT_SUCCESS(rc))
1338 {
1339 /*
1340 * We want the memory to be aligned on 64 byte, so the first time thru
1341 * here we do some exploratory allocations to see how we can achieve this.
1342 * On subsequent runs we only make an initial adjustment allocation, if
1343 * necessary.
1344 *
1345 * Since we own the heap implementation, we know that the internal block
1346 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1347 * so all we need to wrt allocation size adjustments is to add 32 bytes
1348 * to the size, align up by 64 bytes, and subtract 32 bytes.
1349 *
1350 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1351 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1352 * allocation to force subsequent allocations to return 64 byte aligned
1353 * user areas.
1354 */
1355 if (!pExecMemAllocator->cbHeapBlockHdr)
1356 {
1357 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1358 pExecMemAllocator->cbHeapAlignTweak = 64;
1359 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1360 32 /*cbAlignment*/);
1361 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1362
1363 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1364 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1365 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1366 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1367 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1368
1369 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 RTHeapSimpleFree(hHeap, pvTest2);
1376 RTHeapSimpleFree(hHeap, pvTest1);
1377 }
1378 else
1379 {
1380 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1381 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1382 }
1383 if (RT_SUCCESS(rc))
1384#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1385 {
1386 /*
1387 * Add the chunk.
1388 *
1389 * This must be done before the unwind init so windows can allocate
1390 * memory from the chunk when using the alternative sub-allocator.
1391 */
1392 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1393#ifdef IN_RING3
1394 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1395#endif
1396#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1397 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1398#else
1399 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1400 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1401 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1402 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1403#endif
1404
1405 pExecMemAllocator->cChunks = idxChunk + 1;
1406 pExecMemAllocator->idxChunkHint = idxChunk;
1407
1408#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1409 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1410 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1411#else
1412 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1413 pExecMemAllocator->cbTotal += cbFree;
1414 pExecMemAllocator->cbFree += cbFree;
1415#endif
1416
1417#ifdef IN_RING3
1418 /*
1419 * Initialize the unwind information (this cannot really fail atm).
1420 * (This sets pvUnwindInfo.)
1421 */
1422 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1423 if (RT_SUCCESS(rc))
1424#endif
1425 {
1426 return VINF_SUCCESS;
1427 }
1428
1429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1430 /* Just in case the impossible happens, undo the above up: */
1431 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1432 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1433 pExecMemAllocator->cChunks = idxChunk;
1434 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1435 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1436 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1437 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1438#endif
1439 }
1440#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1441 }
1442#endif
1443 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1444 RT_NOREF(pVCpu);
1445 return rc;
1446}
1447
1448
1449/**
1450 * Initializes the executable memory allocator for native recompilation on the
1451 * calling EMT.
1452 *
1453 * @returns VBox status code.
1454 * @param pVCpu The cross context virtual CPU structure of the calling
1455 * thread.
1456 * @param cbMax The max size of the allocator.
1457 * @param cbInitial The initial allocator size.
1458 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1459 * dependent).
1460 */
1461int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1462{
1463 /*
1464 * Validate input.
1465 */
1466 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1467 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1468 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1469 || cbChunk == 0
1470 || ( RT_IS_POWER_OF_TWO(cbChunk)
1471 && cbChunk >= _1M
1472 && cbChunk <= _256M
1473 && cbChunk <= cbMax),
1474 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1475 VERR_OUT_OF_RANGE);
1476
1477 /*
1478 * Adjust/figure out the chunk size.
1479 */
1480 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1481 {
1482 if (cbMax >= _256M)
1483 cbChunk = _64M;
1484 else
1485 {
1486 if (cbMax < _16M)
1487 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1488 else
1489 cbChunk = (uint32_t)cbMax / 4;
1490 if (!RT_IS_POWER_OF_TWO(cbChunk))
1491 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1492 }
1493 }
1494
1495 if (cbChunk > cbMax)
1496 cbMax = cbChunk;
1497 else
1498 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1499 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1500 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1501
1502 /*
1503 * Allocate and initialize the allocatore instance.
1504 */
1505 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1506#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1507 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1508 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1509 cbNeeded += cbBitmap * cMaxChunks;
1510 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1511 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1512#endif
1513#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1514 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1515 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1516#endif
1517 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1518 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1519 VERR_NO_MEMORY);
1520 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1521 pExecMemAllocator->cbChunk = cbChunk;
1522 pExecMemAllocator->cMaxChunks = cMaxChunks;
1523 pExecMemAllocator->cChunks = 0;
1524 pExecMemAllocator->idxChunkHint = 0;
1525 pExecMemAllocator->cAllocations = 0;
1526 pExecMemAllocator->cbTotal = 0;
1527 pExecMemAllocator->cbFree = 0;
1528 pExecMemAllocator->cbAllocated = 0;
1529#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1530 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1531 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1532 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1533 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1534#endif
1535#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1536 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1537#endif
1538 for (uint32_t i = 0; i < cMaxChunks; i++)
1539 {
1540#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1541 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1542 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1543#else
1544 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1545#endif
1546 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1547#ifdef IN_RING0
1548 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1549#else
1550 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1551#endif
1552 }
1553 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1554
1555 /*
1556 * Do the initial allocations.
1557 */
1558 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1559 {
1560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1561 AssertLogRelRCReturn(rc, rc);
1562 }
1563
1564 pExecMemAllocator->idxChunkHint = 0;
1565
1566 return VINF_SUCCESS;
1567}
1568
1569
1570/*********************************************************************************************************************************
1571* Native Recompilation *
1572*********************************************************************************************************************************/
1573
1574
1575/**
1576 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1577 */
1578IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1579{
1580 pVCpu->iem.s.cInstructions += idxInstr;
1581 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1582}
1583
1584
1585/**
1586 * Used by TB code when it wants to raise a \#GP(0).
1587 */
1588IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1589{
1590 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1591#ifndef _MSC_VER
1592 return VINF_IEM_RAISED_XCPT; /* not reached */
1593#endif
1594}
1595
1596
1597/**
1598 * Used by TB code when detecting opcode changes.
1599 * @see iemThreadeFuncWorkerObsoleteTb
1600 */
1601IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1602{
1603 /* We set fSafeToFree to false where as we're being called in the context
1604 of a TB callback function, which for native TBs means we cannot release
1605 the executable memory till we've returned our way back to iemTbExec as
1606 that return path codes via the native code generated for the TB. */
1607 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1608 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1609 return VINF_IEM_REEXEC_BREAK;
1610}
1611
1612
1613/**
1614 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1615 */
1616IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1617{
1618 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1619 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1620 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1621 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1622 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1623 return VINF_IEM_REEXEC_BREAK;
1624}
1625
1626
1627/**
1628 * Used by TB code when we missed a PC check after a branch.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1631{
1632 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1633 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1634 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1635 pVCpu->iem.s.pbInstrBuf));
1636 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1637 return VINF_IEM_REEXEC_BREAK;
1638}
1639
1640
1641
1642/*********************************************************************************************************************************
1643* Helpers: Segmented memory fetches and stores. *
1644*********************************************************************************************************************************/
1645
1646/**
1647 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1648 */
1649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1650{
1651 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1652}
1653
1654
1655/**
1656 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1657 * to 16 bits.
1658 */
1659IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1660{
1661 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1662}
1663
1664
1665/**
1666 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1667 * to 32 bits.
1668 */
1669IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1670{
1671 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1672}
1673
1674/**
1675 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1676 * to 64 bits.
1677 */
1678IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1679{
1680 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1681}
1682
1683
1684/**
1685 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1686 */
1687IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1688{
1689 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1690}
1691
1692
1693/**
1694 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1695 * to 32 bits.
1696 */
1697IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1698{
1699 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1700}
1701
1702
1703/**
1704 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1705 * to 64 bits.
1706 */
1707IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1708{
1709 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1710}
1711
1712
1713/**
1714 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1715 */
1716IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1717{
1718 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1719}
1720
1721
1722/**
1723 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1724 * to 64 bits.
1725 */
1726IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1727{
1728 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1729}
1730
1731
1732/**
1733 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1736{
1737 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1738}
1739
1740
1741/**
1742 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1745{
1746 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1747}
1748
1749
1750/**
1751 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1754{
1755 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1756}
1757
1758
1759/**
1760 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1763{
1764 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1765}
1766
1767
1768/**
1769 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1772{
1773 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1774}
1775
1776
1777
1778/**
1779 * Used by TB code to push unsigned 16-bit value onto a generic stack.
1780 */
1781IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1782{
1783 iemMemStackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemStackPushU16SafeJmp */
1784}
1785
1786
1787/**
1788 * Used by TB code to push unsigned 32-bit value onto a generic stack.
1789 */
1790IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
1791{
1792 iemMemStackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SafeJmp */
1793}
1794
1795
1796/**
1797 * Used by TB code to push 32-bit selector value onto a generic stack.
1798 *
1799 * Intel CPUs doesn't do write a whole dword, thus the special function.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
1802{
1803 iemMemStackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SRegSafeJmp */
1804}
1805
1806
1807/**
1808 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1809 */
1810IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
1811{
1812 iemMemStackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemStackPushU64SafeJmp */
1813}
1814
1815
1816/**
1817 * Used by TB code to pop a 16-bit general purpose register off a generic stack.
1818 */
1819IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
1820{
1821 iemMemStackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU16SafeJmp */
1822}
1823
1824
1825/**
1826 * Used by TB code to pop a 32-bit general purpose register off a generic stack.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
1829{
1830 iemMemStackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU32SafeJmp */
1831}
1832
1833
1834/**
1835 * Used by TB code to pop a 64-bit general purpose register off a generic stack.
1836 */
1837IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
1838{
1839 iemMemStackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU64SafeJmp */
1840}
1841
1842
1843
1844/*********************************************************************************************************************************
1845* Helpers: Flat memory fetches and stores. *
1846*********************************************************************************************************************************/
1847
1848/**
1849 * Used by TB code to load unsigned 8-bit data w/ flat address.
1850 * @note Zero extending the value to 64-bit to simplify assembly.
1851 */
1852IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1853{
1854 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1855}
1856
1857
1858/**
1859 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1860 * to 16 bits.
1861 * @note Zero extending the value to 64-bit to simplify assembly.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1864{
1865 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1866}
1867
1868
1869/**
1870 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1871 * to 32 bits.
1872 * @note Zero extending the value to 64-bit to simplify assembly.
1873 */
1874IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1875{
1876 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1877}
1878
1879
1880/**
1881 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1882 * to 64 bits.
1883 */
1884IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1885{
1886 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1887}
1888
1889
1890/**
1891 * Used by TB code to load unsigned 16-bit data w/ flat address.
1892 * @note Zero extending the value to 64-bit to simplify assembly.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1895{
1896 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1897}
1898
1899
1900/**
1901 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1902 * to 32 bits.
1903 * @note Zero extending the value to 64-bit to simplify assembly.
1904 */
1905IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1906{
1907 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1908}
1909
1910
1911/**
1912 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1913 * to 64 bits.
1914 * @note Zero extending the value to 64-bit to simplify assembly.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1917{
1918 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1919}
1920
1921
1922/**
1923 * Used by TB code to load unsigned 32-bit data w/ flat address.
1924 * @note Zero extending the value to 64-bit to simplify assembly.
1925 */
1926IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1927{
1928 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1929}
1930
1931
1932/**
1933 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1934 * to 64 bits.
1935 * @note Zero extending the value to 64-bit to simplify assembly.
1936 */
1937IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1938{
1939 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1940}
1941
1942
1943/**
1944 * Used by TB code to load unsigned 64-bit data w/ flat address.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1947{
1948 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1949}
1950
1951
1952/**
1953 * Used by TB code to store unsigned 8-bit data w/ flat address.
1954 */
1955IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1956{
1957 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1958}
1959
1960
1961/**
1962 * Used by TB code to store unsigned 16-bit data w/ flat address.
1963 */
1964IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1965{
1966 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1967}
1968
1969
1970/**
1971 * Used by TB code to store unsigned 32-bit data w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1974{
1975 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1976}
1977
1978
1979/**
1980 * Used by TB code to store unsigned 64-bit data w/ flat address.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1983{
1984 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1985}
1986
1987
1988
1989/**
1990 * Used by TB code to push unsigned 16-bit value onto a flat 32-bit stack.
1991 */
1992IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1993{
1994 iemMemFlat32StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat32StackPushU16SafeJmp */
1995}
1996
1997
1998/**
1999 * Used by TB code to push unsigned 32-bit value onto a flat 32-bit stack.
2000 */
2001IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
2002{
2003 iemMemFlat32StackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SafeJmp */
2004}
2005
2006
2007/**
2008 * Used by TB code to push segment selector value onto a flat 32-bit stack.
2009 *
2010 * Intel CPUs doesn't do write a whole dword, thus the special function.
2011 */
2012IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
2013{
2014 iemMemFlat32StackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SRegSafeJmp */
2015}
2016
2017
2018/**
2019 * Used by TB code to pop a 16-bit general purpose register off a flat 32-bit stack.
2020 */
2021IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
2022{
2023 iemMemFlat32StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU16SafeJmp */
2024}
2025
2026
2027/**
2028 * Used by TB code to pop a 64-bit general purpose register off a flat 32-bit stack.
2029 */
2030IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
2031{
2032 iemMemFlat32StackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU32SafeJmp */
2033}
2034
2035
2036
2037/**
2038 * Used by TB code to push unsigned 16-bit value onto a flat 64-bit stack.
2039 */
2040IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
2041{
2042 iemMemFlat64StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat64StackPushU16SafeJmp */
2043}
2044
2045
2046/**
2047 * Used by TB code to push unsigned 64-bit value onto a flat 64-bit stack.
2048 */
2049IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
2050{
2051 iemMemFlat64StackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemFlat64StackPushU64SafeJmp */
2052}
2053
2054
2055/**
2056 * Used by TB code to pop a 16-bit general purpose register off a flat 64-bit stack.
2057 */
2058IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
2059{
2060 iemMemFlat64StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU16SafeJmp */
2061}
2062
2063
2064/**
2065 * Used by TB code to pop a 64-bit general purpose register off a flat 64-bit stack.
2066 */
2067IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
2068{
2069 iemMemFlat64StackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU64SafeJmp */
2070}
2071
2072
2073
2074/*********************************************************************************************************************************
2075* Helpers: Segmented memory mapping. *
2076*********************************************************************************************************************************/
2077
2078/**
2079 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2082 RTGCPTR GCPtrMem, uint8_t iSegReg))
2083{
2084 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RwSafeJmp */
2085}
2086
2087
2088/**
2089 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2092 RTGCPTR GCPtrMem, uint8_t iSegReg))
2093{
2094 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8WoSafeJmp */
2095}
2096
2097
2098/**
2099 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2100 */
2101IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2102 RTGCPTR GCPtrMem, uint8_t iSegReg))
2103{
2104 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RoSafeJmp */
2105}
2106
2107
2108/**
2109 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2110 */
2111IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2112 RTGCPTR GCPtrMem, uint8_t iSegReg))
2113{
2114 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RwSafeJmp */
2115}
2116
2117
2118/**
2119 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2122 RTGCPTR GCPtrMem, uint8_t iSegReg))
2123{
2124 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16WoSafeJmp */
2125}
2126
2127
2128/**
2129 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2130 */
2131IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2132 RTGCPTR GCPtrMem, uint8_t iSegReg))
2133{
2134 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RoSafeJmp */
2135}
2136
2137
2138/**
2139 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2140 */
2141IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2142 RTGCPTR GCPtrMem, uint8_t iSegReg))
2143{
2144 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RwSafeJmp */
2145}
2146
2147
2148/**
2149 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2150 */
2151IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2152 RTGCPTR GCPtrMem, uint8_t iSegReg))
2153{
2154 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32WoSafeJmp */
2155}
2156
2157
2158/**
2159 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2160 */
2161IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2162 RTGCPTR GCPtrMem, uint8_t iSegReg))
2163{
2164 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RoSafeJmp */
2165}
2166
2167
2168/**
2169 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2172 RTGCPTR GCPtrMem, uint8_t iSegReg))
2173{
2174 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RwSafeJmp */
2175}
2176
2177
2178/**
2179 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2182 RTGCPTR GCPtrMem, uint8_t iSegReg))
2183{
2184 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64WoSafeJmp */
2185}
2186
2187
2188/**
2189 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2190 */
2191IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2192 RTGCPTR GCPtrMem, uint8_t iSegReg))
2193{
2194 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RoSafeJmp */
2195}
2196
2197
2198/**
2199 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2202 RTGCPTR GCPtrMem, uint8_t iSegReg))
2203{
2204 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataR80WoSafeJmp */
2205}
2206
2207
2208/**
2209 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2210 */
2211IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2212 RTGCPTR GCPtrMem, uint8_t iSegReg))
2213{
2214 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataD80WoSafeJmp */
2215}
2216
2217
2218/**
2219 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2220 */
2221IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2222 RTGCPTR GCPtrMem, uint8_t iSegReg))
2223{
2224 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RwSafeJmp */
2225}
2226
2227
2228/**
2229 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2230 */
2231IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2232 RTGCPTR GCPtrMem, uint8_t iSegReg))
2233{
2234 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128WoSafeJmp */
2235}
2236
2237
2238/**
2239 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2240 */
2241IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2242 RTGCPTR GCPtrMem, uint8_t iSegReg))
2243{
2244 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RoSafeJmp */
2245}
2246
2247
2248/*********************************************************************************************************************************
2249* Helpers: Flat memory mapping. *
2250*********************************************************************************************************************************/
2251
2252/**
2253 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2254 */
2255IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2256{
2257 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RwSafeJmp */
2258}
2259
2260
2261/**
2262 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2263 */
2264IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2265{
2266 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8WoSafeJmp */
2267}
2268
2269
2270/**
2271 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2272 */
2273IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2274{
2275 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RoSafeJmp */
2276}
2277
2278
2279/**
2280 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2281 */
2282IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2283{
2284 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RwSafeJmp */
2285}
2286
2287
2288/**
2289 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2290 */
2291IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2292{
2293 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16WoSafeJmp */
2294}
2295
2296
2297/**
2298 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2299 */
2300IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2301{
2302 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RoSafeJmp */
2303}
2304
2305
2306/**
2307 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2308 */
2309IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2310{
2311 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RwSafeJmp */
2312}
2313
2314
2315/**
2316 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2317 */
2318IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2319{
2320 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32WoSafeJmp */
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2328{
2329 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RoSafeJmp */
2330}
2331
2332
2333/**
2334 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2335 */
2336IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2337{
2338 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RwSafeJmp */
2339}
2340
2341
2342/**
2343 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2344 */
2345IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2346{
2347 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64WoSafeJmp */
2348}
2349
2350
2351/**
2352 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2353 */
2354IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2355{
2356 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RoSafeJmp */
2357}
2358
2359
2360/**
2361 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2362 */
2363IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2364{
2365 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataR80WoSafeJmp */
2366}
2367
2368
2369/**
2370 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2371 */
2372IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2373{
2374 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataD80WoSafeJmp */
2375}
2376
2377
2378/**
2379 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2380 */
2381IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2382{
2383 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RwSafeJmp */
2384}
2385
2386
2387/**
2388 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2389 */
2390IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2391{
2392 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128WoSafeJmp */
2393}
2394
2395
2396/**
2397 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2400{
2401 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RoSafeJmp */
2402}
2403
2404
2405/*********************************************************************************************************************************
2406* Helpers: Commit, rollback & unmap *
2407*********************************************************************************************************************************/
2408
2409/**
2410 * Used by TB code to commit and unmap a read-write memory mapping.
2411 */
2412IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2413{
2414 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2415}
2416
2417
2418/**
2419 * Used by TB code to commit and unmap a write-only memory mapping.
2420 */
2421IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2422{
2423 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2424}
2425
2426
2427/**
2428 * Used by TB code to commit and unmap a read-only memory mapping.
2429 */
2430IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2431{
2432 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2433}
2434
2435
2436/**
2437 * Reinitializes the native recompiler state.
2438 *
2439 * Called before starting a new recompile job.
2440 */
2441static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2442{
2443 pReNative->cLabels = 0;
2444 pReNative->bmLabelTypes = 0;
2445 pReNative->cFixups = 0;
2446#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2447 pReNative->pDbgInfo->cEntries = 0;
2448#endif
2449 pReNative->pTbOrg = pTb;
2450 pReNative->cCondDepth = 0;
2451 pReNative->uCondSeqNo = 0;
2452 pReNative->uCheckIrqSeqNo = 0;
2453 pReNative->uTlbSeqNo = 0;
2454
2455 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2456#if IEMNATIVE_HST_GREG_COUNT < 32
2457 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2458#endif
2459 ;
2460 pReNative->Core.bmHstRegsWithGstShadow = 0;
2461 pReNative->Core.bmGstRegShadows = 0;
2462 pReNative->Core.bmVars = 0;
2463 pReNative->Core.bmStack = 0;
2464 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2465 pReNative->Core.u64ArgVars = UINT64_MAX;
2466
2467 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2468 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2469 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2470 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2471 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2472 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2473 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2474 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2475 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2476 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2477
2478 /* Full host register reinit: */
2479 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2480 {
2481 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2482 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2483 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2484 }
2485
2486 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2487 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2488#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2489 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2490#endif
2491#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2492 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2493#endif
2494 );
2495 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2496 {
2497 fRegs &= ~RT_BIT_32(idxReg);
2498 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2499 }
2500
2501 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2502#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2503 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2504#endif
2505#ifdef IEMNATIVE_REG_FIXED_TMP0
2506 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2507#endif
2508 return pReNative;
2509}
2510
2511
2512/**
2513 * Allocates and initializes the native recompiler state.
2514 *
2515 * This is called the first time an EMT wants to recompile something.
2516 *
2517 * @returns Pointer to the new recompiler state.
2518 * @param pVCpu The cross context virtual CPU structure of the calling
2519 * thread.
2520 * @param pTb The TB that's about to be recompiled.
2521 * @thread EMT(pVCpu)
2522 */
2523static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2524{
2525 VMCPU_ASSERT_EMT(pVCpu);
2526
2527 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2528 AssertReturn(pReNative, NULL);
2529
2530 /*
2531 * Try allocate all the buffers and stuff we need.
2532 */
2533 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2534 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2535 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2536#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2537 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2538#endif
2539 if (RT_LIKELY( pReNative->pInstrBuf
2540 && pReNative->paLabels
2541 && pReNative->paFixups)
2542#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2543 && pReNative->pDbgInfo
2544#endif
2545 )
2546 {
2547 /*
2548 * Set the buffer & array sizes on success.
2549 */
2550 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2551 pReNative->cLabelsAlloc = _8K;
2552 pReNative->cFixupsAlloc = _16K;
2553#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2554 pReNative->cDbgInfoAlloc = _16K;
2555#endif
2556
2557 /*
2558 * Done, just need to save it and reinit it.
2559 */
2560 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2561 return iemNativeReInit(pReNative, pTb);
2562 }
2563
2564 /*
2565 * Failed. Cleanup and return.
2566 */
2567 AssertFailed();
2568 RTMemFree(pReNative->pInstrBuf);
2569 RTMemFree(pReNative->paLabels);
2570 RTMemFree(pReNative->paFixups);
2571#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2572 RTMemFree(pReNative->pDbgInfo);
2573#endif
2574 RTMemFree(pReNative);
2575 return NULL;
2576}
2577
2578
2579/**
2580 * Creates a label
2581 *
2582 * If the label does not yet have a defined position,
2583 * call iemNativeLabelDefine() later to set it.
2584 *
2585 * @returns Label ID. Throws VBox status code on failure, so no need to check
2586 * the return value.
2587 * @param pReNative The native recompile state.
2588 * @param enmType The label type.
2589 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2590 * label is not yet defined (default).
2591 * @param uData Data associated with the lable. Only applicable to
2592 * certain type of labels. Default is zero.
2593 */
2594DECL_HIDDEN_THROW(uint32_t)
2595iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2596 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2597{
2598 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2599
2600 /*
2601 * Locate existing label definition.
2602 *
2603 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2604 * and uData is zero.
2605 */
2606 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2607 uint32_t const cLabels = pReNative->cLabels;
2608 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2609#ifndef VBOX_STRICT
2610 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2611 && offWhere == UINT32_MAX
2612 && uData == 0
2613#endif
2614 )
2615 {
2616#ifndef VBOX_STRICT
2617 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2618 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2619 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2620 if (idxLabel < pReNative->cLabels)
2621 return idxLabel;
2622#else
2623 for (uint32_t i = 0; i < cLabels; i++)
2624 if ( paLabels[i].enmType == enmType
2625 && paLabels[i].uData == uData)
2626 {
2627 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2628 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2629 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2630 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2631 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2632 return i;
2633 }
2634 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2635 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2636#endif
2637 }
2638
2639 /*
2640 * Make sure we've got room for another label.
2641 */
2642 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2643 { /* likely */ }
2644 else
2645 {
2646 uint32_t cNew = pReNative->cLabelsAlloc;
2647 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2648 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2649 cNew *= 2;
2650 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2651 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2652 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2653 pReNative->paLabels = paLabels;
2654 pReNative->cLabelsAlloc = cNew;
2655 }
2656
2657 /*
2658 * Define a new label.
2659 */
2660 paLabels[cLabels].off = offWhere;
2661 paLabels[cLabels].enmType = enmType;
2662 paLabels[cLabels].uData = uData;
2663 pReNative->cLabels = cLabels + 1;
2664
2665 Assert((unsigned)enmType < 64);
2666 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2667
2668 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2669 {
2670 Assert(uData == 0);
2671 pReNative->aidxUniqueLabels[enmType] = cLabels;
2672 }
2673
2674 if (offWhere != UINT32_MAX)
2675 {
2676#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2677 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2678 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2679#endif
2680 }
2681 return cLabels;
2682}
2683
2684
2685/**
2686 * Defines the location of an existing label.
2687 *
2688 * @param pReNative The native recompile state.
2689 * @param idxLabel The label to define.
2690 * @param offWhere The position.
2691 */
2692DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2693{
2694 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2695 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2696 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2697 pLabel->off = offWhere;
2698#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2699 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2700 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2701#endif
2702}
2703
2704
2705/**
2706 * Looks up a lable.
2707 *
2708 * @returns Label ID if found, UINT32_MAX if not.
2709 */
2710static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2711 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2712{
2713 Assert((unsigned)enmType < 64);
2714 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2715 {
2716 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2717 return pReNative->aidxUniqueLabels[enmType];
2718
2719 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2720 uint32_t const cLabels = pReNative->cLabels;
2721 for (uint32_t i = 0; i < cLabels; i++)
2722 if ( paLabels[i].enmType == enmType
2723 && paLabels[i].uData == uData
2724 && ( paLabels[i].off == offWhere
2725 || offWhere == UINT32_MAX
2726 || paLabels[i].off == UINT32_MAX))
2727 return i;
2728 }
2729 return UINT32_MAX;
2730}
2731
2732
2733/**
2734 * Adds a fixup.
2735 *
2736 * @throws VBox status code (int) on failure.
2737 * @param pReNative The native recompile state.
2738 * @param offWhere The instruction offset of the fixup location.
2739 * @param idxLabel The target label ID for the fixup.
2740 * @param enmType The fixup type.
2741 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2742 */
2743DECL_HIDDEN_THROW(void)
2744iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2745 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2746{
2747 Assert(idxLabel <= UINT16_MAX);
2748 Assert((unsigned)enmType <= UINT8_MAX);
2749
2750 /*
2751 * Make sure we've room.
2752 */
2753 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2754 uint32_t const cFixups = pReNative->cFixups;
2755 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2756 { /* likely */ }
2757 else
2758 {
2759 uint32_t cNew = pReNative->cFixupsAlloc;
2760 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2761 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2762 cNew *= 2;
2763 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2764 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2765 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2766 pReNative->paFixups = paFixups;
2767 pReNative->cFixupsAlloc = cNew;
2768 }
2769
2770 /*
2771 * Add the fixup.
2772 */
2773 paFixups[cFixups].off = offWhere;
2774 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2775 paFixups[cFixups].enmType = enmType;
2776 paFixups[cFixups].offAddend = offAddend;
2777 pReNative->cFixups = cFixups + 1;
2778}
2779
2780
2781/**
2782 * Slow code path for iemNativeInstrBufEnsure.
2783 */
2784DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2785{
2786 /* Double the buffer size till we meet the request. */
2787 uint32_t cNew = pReNative->cInstrBufAlloc;
2788 AssertReturn(cNew > 0, NULL);
2789 do
2790 cNew *= 2;
2791 while (cNew < off + cInstrReq);
2792
2793 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2794#ifdef RT_ARCH_ARM64
2795 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2796#else
2797 uint32_t const cbMaxInstrBuf = _2M;
2798#endif
2799 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2800
2801 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2802 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2803
2804 pReNative->cInstrBufAlloc = cNew;
2805 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2806}
2807
2808#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2809
2810/**
2811 * Grows the static debug info array used during recompilation.
2812 *
2813 * @returns Pointer to the new debug info block; throws VBox status code on
2814 * failure, so no need to check the return value.
2815 */
2816DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2817{
2818 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2819 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2820 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2821 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2822 pReNative->pDbgInfo = pDbgInfo;
2823 pReNative->cDbgInfoAlloc = cNew;
2824 return pDbgInfo;
2825}
2826
2827
2828/**
2829 * Adds a new debug info uninitialized entry, returning the pointer to it.
2830 */
2831DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2832{
2833 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2834 { /* likely */ }
2835 else
2836 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2837 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2838}
2839
2840
2841/**
2842 * Debug Info: Adds a native offset record, if necessary.
2843 */
2844static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2845{
2846 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2847
2848 /*
2849 * Search backwards to see if we've got a similar record already.
2850 */
2851 uint32_t idx = pDbgInfo->cEntries;
2852 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
2853 while (idx-- > idxStop)
2854 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
2855 {
2856 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
2857 return;
2858 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
2859 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2860 break;
2861 }
2862
2863 /*
2864 * Add it.
2865 */
2866 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2867 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2868 pEntry->NativeOffset.offNative = off;
2869}
2870
2871
2872/**
2873 * Debug Info: Record info about a label.
2874 */
2875static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2876{
2877 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2878 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2879 pEntry->Label.uUnused = 0;
2880 pEntry->Label.enmLabel = (uint8_t)enmType;
2881 pEntry->Label.uData = uData;
2882}
2883
2884
2885/**
2886 * Debug Info: Record info about a threaded call.
2887 */
2888static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2889{
2890 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2891 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2892 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2893 pEntry->ThreadedCall.uUnused = 0;
2894 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2895}
2896
2897
2898/**
2899 * Debug Info: Record info about a new guest instruction.
2900 */
2901static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2902{
2903 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2904 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2905 pEntry->GuestInstruction.uUnused = 0;
2906 pEntry->GuestInstruction.fExec = fExec;
2907}
2908
2909
2910/**
2911 * Debug Info: Record info about guest register shadowing.
2912 */
2913static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2914 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2915{
2916 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2917 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2918 pEntry->GuestRegShadowing.uUnused = 0;
2919 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2920 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2921 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2922}
2923
2924#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2925
2926
2927/*********************************************************************************************************************************
2928* Register Allocator *
2929*********************************************************************************************************************************/
2930
2931/**
2932 * Register parameter indexes (indexed by argument number).
2933 */
2934DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2935{
2936 IEMNATIVE_CALL_ARG0_GREG,
2937 IEMNATIVE_CALL_ARG1_GREG,
2938 IEMNATIVE_CALL_ARG2_GREG,
2939 IEMNATIVE_CALL_ARG3_GREG,
2940#if defined(IEMNATIVE_CALL_ARG4_GREG)
2941 IEMNATIVE_CALL_ARG4_GREG,
2942# if defined(IEMNATIVE_CALL_ARG5_GREG)
2943 IEMNATIVE_CALL_ARG5_GREG,
2944# if defined(IEMNATIVE_CALL_ARG6_GREG)
2945 IEMNATIVE_CALL_ARG6_GREG,
2946# if defined(IEMNATIVE_CALL_ARG7_GREG)
2947 IEMNATIVE_CALL_ARG7_GREG,
2948# endif
2949# endif
2950# endif
2951#endif
2952};
2953
2954/**
2955 * Call register masks indexed by argument count.
2956 */
2957DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2958{
2959 0,
2960 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2961 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2962 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2963 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2964 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2965#if defined(IEMNATIVE_CALL_ARG4_GREG)
2966 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2967 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2968# if defined(IEMNATIVE_CALL_ARG5_GREG)
2969 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2970 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2971# if defined(IEMNATIVE_CALL_ARG6_GREG)
2972 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2973 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2974 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2975# if defined(IEMNATIVE_CALL_ARG7_GREG)
2976 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2977 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2978 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2979# endif
2980# endif
2981# endif
2982#endif
2983};
2984
2985#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2986/**
2987 * BP offset of the stack argument slots.
2988 *
2989 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2990 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2991 */
2992DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2993{
2994 IEMNATIVE_FP_OFF_STACK_ARG0,
2995# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2996 IEMNATIVE_FP_OFF_STACK_ARG1,
2997# endif
2998# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2999 IEMNATIVE_FP_OFF_STACK_ARG2,
3000# endif
3001# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3002 IEMNATIVE_FP_OFF_STACK_ARG3,
3003# endif
3004};
3005AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3006#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3007
3008/**
3009 * Info about shadowed guest register values.
3010 * @see IEMNATIVEGSTREG
3011 */
3012static struct
3013{
3014 /** Offset in VMCPU. */
3015 uint32_t off;
3016 /** The field size. */
3017 uint8_t cb;
3018 /** Name (for logging). */
3019 const char *pszName;
3020} const g_aGstShadowInfo[] =
3021{
3022#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3023 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3024 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3025 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3026 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3027 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3028 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3029 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3030 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3031 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3032 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3033 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3034 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3035 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3036 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3037 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3038 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3039 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3040 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3041 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3042 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3043 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3044 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3045 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3046 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3047 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3048 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3049 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3050 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3051 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3052 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3053 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3054 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3055 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3056 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3057 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3058 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3059 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3060 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3061 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3062 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3063 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3064 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3065#undef CPUMCTX_OFF_AND_SIZE
3066};
3067AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3068
3069
3070/** Host CPU general purpose register names. */
3071DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3072{
3073#ifdef RT_ARCH_AMD64
3074 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3075#elif RT_ARCH_ARM64
3076 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3077 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3078#else
3079# error "port me"
3080#endif
3081};
3082
3083
3084DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3085 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3086{
3087 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3088
3089 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3090 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3091 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3092 return (uint8_t)idxReg;
3093}
3094
3095
3096/**
3097 * Tries to locate a suitable register in the given register mask.
3098 *
3099 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3100 * failed.
3101 *
3102 * @returns Host register number on success, returns UINT8_MAX on failure.
3103 */
3104static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3105{
3106 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3107 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3108 if (fRegs)
3109 {
3110 /** @todo pick better here: */
3111 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3112
3113 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3114 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3115 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3116 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3117
3118 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3119 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3120 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3121 return idxReg;
3122 }
3123 return UINT8_MAX;
3124}
3125
3126
3127/**
3128 * Locate a register, possibly freeing one up.
3129 *
3130 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3131 * failed.
3132 *
3133 * @returns Host register number on success. Returns UINT8_MAX if no registers
3134 * found, the caller is supposed to deal with this and raise a
3135 * allocation type specific status code (if desired).
3136 *
3137 * @throws VBox status code if we're run into trouble spilling a variable of
3138 * recording debug info. Does NOT throw anything if we're out of
3139 * registers, though.
3140 */
3141static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3142 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3143{
3144 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3145 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3146
3147 /*
3148 * Try a freed register that's shadowing a guest register
3149 */
3150 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3151 if (fRegs)
3152 {
3153 unsigned const idxReg = (fPreferVolatile
3154 ? ASMBitFirstSetU32(fRegs)
3155 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3156 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
3157 - 1;
3158
3159 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3160 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3161 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3162 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3163
3164 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3165 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3166 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3167 return idxReg;
3168 }
3169
3170 /*
3171 * Try free up a variable that's in a register.
3172 *
3173 * We do two rounds here, first evacuating variables we don't need to be
3174 * saved on the stack, then in the second round move things to the stack.
3175 */
3176 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3177 {
3178 uint32_t fVars = pReNative->Core.bmVars;
3179 while (fVars)
3180 {
3181 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3182 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3183 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3184 && (RT_BIT_32(idxReg) & fRegMask)
3185 && ( iLoop == 0
3186 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3187 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3188 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3189 {
3190 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3191 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3192 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3193 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3194 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3195 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3196
3197 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3198 {
3199 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3200 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3201 }
3202
3203 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3204 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3205
3206 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3207 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3208 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3209 return idxReg;
3210 }
3211 fVars &= ~RT_BIT_32(idxVar);
3212 }
3213 }
3214
3215 return UINT8_MAX;
3216}
3217
3218
3219/**
3220 * Reassigns a variable to a different register specified by the caller.
3221 *
3222 * @returns The new code buffer position.
3223 * @param pReNative The native recompile state.
3224 * @param off The current code buffer position.
3225 * @param idxVar The variable index.
3226 * @param idxRegOld The old host register number.
3227 * @param idxRegNew The new host register number.
3228 * @param pszCaller The caller for logging.
3229 */
3230static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3231 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3232{
3233 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3234 RT_NOREF(pszCaller);
3235
3236 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3237
3238 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3239 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3240 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3241 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3242
3243 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3244 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3245 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3246 if (fGstRegShadows)
3247 {
3248 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3249 | RT_BIT_32(idxRegNew);
3250 while (fGstRegShadows)
3251 {
3252 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3253 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3254
3255 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3256 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3257 }
3258 }
3259
3260 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3261 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3262 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3263 return off;
3264}
3265
3266
3267/**
3268 * Moves a variable to a different register or spills it onto the stack.
3269 *
3270 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3271 * kinds can easily be recreated if needed later.
3272 *
3273 * @returns The new code buffer position.
3274 * @param pReNative The native recompile state.
3275 * @param off The current code buffer position.
3276 * @param idxVar The variable index.
3277 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3278 * call-volatile registers.
3279 */
3280static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3281 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3282{
3283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3284 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3285 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3286
3287 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3288 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3289 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3290 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3291 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3292 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3293 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3294 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3295 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3296
3297
3298 /** @todo Add statistics on this.*/
3299 /** @todo Implement basic variable liveness analysis (python) so variables
3300 * can be freed immediately once no longer used. This has the potential to
3301 * be trashing registers and stack for dead variables. */
3302
3303 /*
3304 * First try move it to a different register, as that's cheaper.
3305 */
3306 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3307 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3308 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3309 if (fRegs)
3310 {
3311 /* Avoid using shadow registers, if possible. */
3312 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3313 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3314 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3315 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3316 }
3317
3318 /*
3319 * Otherwise we must spill the register onto the stack.
3320 */
3321 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3322 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3323 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3324 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3325
3326 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3327 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3328 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3329 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3330 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3331 return off;
3332}
3333
3334
3335/**
3336 * Allocates a temporary host general purpose register.
3337 *
3338 * This may emit code to save register content onto the stack in order to free
3339 * up a register.
3340 *
3341 * @returns The host register number; throws VBox status code on failure,
3342 * so no need to check the return value.
3343 * @param pReNative The native recompile state.
3344 * @param poff Pointer to the variable with the code buffer position.
3345 * This will be update if we need to move a variable from
3346 * register to stack in order to satisfy the request.
3347 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3348 * registers (@c true, default) or the other way around
3349 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3350 */
3351DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3352{
3353 /*
3354 * Try find a completely unused register, preferably a call-volatile one.
3355 */
3356 uint8_t idxReg;
3357 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3358 & ~pReNative->Core.bmHstRegsWithGstShadow
3359 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3360 if (fRegs)
3361 {
3362 if (fPreferVolatile)
3363 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3364 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3365 else
3366 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3367 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3368 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3369 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3370 }
3371 else
3372 {
3373 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3374 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3375 }
3376 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3377}
3378
3379
3380/**
3381 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3382 * registers.
3383 *
3384 * @returns The host register number; throws VBox status code on failure,
3385 * so no need to check the return value.
3386 * @param pReNative The native recompile state.
3387 * @param poff Pointer to the variable with the code buffer position.
3388 * This will be update if we need to move a variable from
3389 * register to stack in order to satisfy the request.
3390 * @param fRegMask Mask of acceptable registers.
3391 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3392 * registers (@c true, default) or the other way around
3393 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3394 */
3395DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3396 bool fPreferVolatile /*= true*/)
3397{
3398 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3399 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3400
3401 /*
3402 * Try find a completely unused register, preferably a call-volatile one.
3403 */
3404 uint8_t idxReg;
3405 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3406 & ~pReNative->Core.bmHstRegsWithGstShadow
3407 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3408 & fRegMask;
3409 if (fRegs)
3410 {
3411 if (fPreferVolatile)
3412 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3413 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3414 else
3415 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3416 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3417 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3418 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3419 }
3420 else
3421 {
3422 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3423 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3424 }
3425 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3426}
3427
3428
3429/**
3430 * Allocates a temporary register for loading an immediate value into.
3431 *
3432 * This will emit code to load the immediate, unless there happens to be an
3433 * unused register with the value already loaded.
3434 *
3435 * The caller will not modify the returned register, it must be considered
3436 * read-only. Free using iemNativeRegFreeTmpImm.
3437 *
3438 * @returns The host register number; throws VBox status code on failure, so no
3439 * need to check the return value.
3440 * @param pReNative The native recompile state.
3441 * @param poff Pointer to the variable with the code buffer position.
3442 * @param uImm The immediate value that the register must hold upon
3443 * return.
3444 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3445 * registers (@c true, default) or the other way around
3446 * (@c false).
3447 *
3448 * @note Reusing immediate values has not been implemented yet.
3449 */
3450DECL_HIDDEN_THROW(uint8_t)
3451iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3452{
3453 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3454 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3455 return idxReg;
3456}
3457
3458
3459/**
3460 * Marks host register @a idxHstReg as containing a shadow copy of guest
3461 * register @a enmGstReg.
3462 *
3463 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3464 * host register before calling.
3465 */
3466DECL_FORCE_INLINE(void)
3467iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3468{
3469 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3470 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3471 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3472
3473 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3474 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
3475 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3476 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3477#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3478 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3479 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3480#else
3481 RT_NOREF(off);
3482#endif
3483}
3484
3485
3486/**
3487 * Clear any guest register shadow claims from @a idxHstReg.
3488 *
3489 * The register does not need to be shadowing any guest registers.
3490 */
3491DECL_FORCE_INLINE(void)
3492iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3493{
3494 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3495 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3496 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3497 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3498 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3499
3500#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3501 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3502 if (fGstRegs)
3503 {
3504 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3505 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3506 while (fGstRegs)
3507 {
3508 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3509 fGstRegs &= ~RT_BIT_64(iGstReg);
3510 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3511 }
3512 }
3513#else
3514 RT_NOREF(off);
3515#endif
3516
3517 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3518 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3519 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3520}
3521
3522
3523/**
3524 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3525 * and global overview flags.
3526 */
3527DECL_FORCE_INLINE(void)
3528iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3529{
3530 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3531 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3532 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3533 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3534 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3535 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3536 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3537
3538#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3539 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3540 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3541#else
3542 RT_NOREF(off);
3543#endif
3544
3545 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3546 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3547 if (!fGstRegShadowsNew)
3548 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3549 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
3550}
3551
3552
3553/**
3554 * Clear any guest register shadow claim for @a enmGstReg.
3555 */
3556DECL_FORCE_INLINE(void)
3557iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3558{
3559 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3560 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3561 {
3562 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3563 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3564 }
3565}
3566
3567
3568/**
3569 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
3570 * as the new shadow of it.
3571 */
3572DECL_FORCE_INLINE(void)
3573iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
3574 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3575{
3576 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3577 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3578 {
3579 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3580 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
3581 return;
3582 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3583 }
3584 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
3585}
3586
3587
3588/**
3589 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
3590 * to @a idxRegTo.
3591 */
3592DECL_FORCE_INLINE(void)
3593iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
3594 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3595{
3596 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
3597 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
3598 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
3599 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
3600 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3601 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
3602 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
3603 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
3604 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
3605
3606 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3607 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
3608 if (!fGstRegShadowsFrom)
3609 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
3610 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
3611 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
3612 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
3613#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3614 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3615 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
3616#else
3617 RT_NOREF(off);
3618#endif
3619}
3620
3621
3622/**
3623 * Allocates a temporary host general purpose register for keeping a guest
3624 * register value.
3625 *
3626 * Since we may already have a register holding the guest register value,
3627 * code will be emitted to do the loading if that's not the case. Code may also
3628 * be emitted if we have to free up a register to satify the request.
3629 *
3630 * @returns The host register number; throws VBox status code on failure, so no
3631 * need to check the return value.
3632 * @param pReNative The native recompile state.
3633 * @param poff Pointer to the variable with the code buffer
3634 * position. This will be update if we need to move a
3635 * variable from register to stack in order to satisfy
3636 * the request.
3637 * @param enmGstReg The guest register that will is to be updated.
3638 * @param enmIntendedUse How the caller will be using the host register.
3639 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3640 * register is okay (default). The ASSUMPTION here is
3641 * that the caller has already flushed all volatile
3642 * registers, so this is only applied if we allocate a
3643 * new register.
3644 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3645 */
3646DECL_HIDDEN_THROW(uint8_t)
3647iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3648 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3649 bool fNoVolatileRegs /*= false*/)
3650{
3651 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3652#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3653 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3654#endif
3655 uint32_t const fRegMask = !fNoVolatileRegs
3656 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3657 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3658
3659 /*
3660 * First check if the guest register value is already in a host register.
3661 */
3662 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3663 {
3664 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3665 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3666 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3667 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3668
3669 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3670 {
3671 /*
3672 * If the register will trash the guest shadow copy, try find a
3673 * completely unused register we can use instead. If that fails,
3674 * we need to disassociate the host reg from the guest reg.
3675 */
3676 /** @todo would be nice to know if preserving the register is in any way helpful. */
3677 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3678 && ( ~pReNative->Core.bmHstRegs
3679 & ~pReNative->Core.bmHstRegsWithGstShadow
3680 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3681 {
3682 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3683
3684 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3685
3686 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3687 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3688 g_apszIemNativeHstRegNames[idxRegNew]));
3689 idxReg = idxRegNew;
3690 }
3691 else
3692 {
3693 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3694 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3695 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3696 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3697 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3698 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3699 else
3700 {
3701 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3702 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3703 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3704 }
3705 }
3706 }
3707 else
3708 {
3709 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3710 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3711 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3712 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3713
3714 /*
3715 * Allocate a new register, copy the value and, if updating, the
3716 * guest shadow copy assignment to the new register.
3717 */
3718 /** @todo share register for readonly access. */
3719 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3720 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3721
3722 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3723 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3724
3725 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3726 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3727 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3728 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3729 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3730 else
3731 {
3732 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3733 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3734 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3735 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3736 }
3737 idxReg = idxRegNew;
3738 }
3739 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3740
3741#ifdef VBOX_STRICT
3742 /* Strict builds: Check that the value is correct. */
3743 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3744#endif
3745
3746 return idxReg;
3747 }
3748
3749 /*
3750 * Allocate a new register, load it with the guest value and designate it as a copy of the
3751 */
3752 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3753
3754 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3755 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3756
3757 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3758 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3759 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3760 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3761
3762 return idxRegNew;
3763}
3764
3765
3766/**
3767 * Allocates a temporary host general purpose register that already holds the
3768 * given guest register value.
3769 *
3770 * The use case for this function is places where the shadowing state cannot be
3771 * modified due to branching and such. This will fail if the we don't have a
3772 * current shadow copy handy or if it's incompatible. The only code that will
3773 * be emitted here is value checking code in strict builds.
3774 *
3775 * The intended use can only be readonly!
3776 *
3777 * @returns The host register number, UINT8_MAX if not present.
3778 * @param pReNative The native recompile state.
3779 * @param poff Pointer to the instruction buffer offset.
3780 * Will be updated in strict builds if a register is
3781 * found.
3782 * @param enmGstReg The guest register that will is to be updated.
3783 * @note In strict builds, this may throw instruction buffer growth failures.
3784 * Non-strict builds will not throw anything.
3785 * @sa iemNativeRegAllocTmpForGuestReg
3786 */
3787DECL_HIDDEN_THROW(uint8_t)
3788iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3789{
3790 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3791
3792 /*
3793 * First check if the guest register value is already in a host register.
3794 */
3795 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3796 {
3797 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3798 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3799 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3800 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3801
3802 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3803 {
3804 /*
3805 * We only do readonly use here, so easy compared to the other
3806 * variant of this code.
3807 */
3808 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3809 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3810 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3811 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3812 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3813
3814#ifdef VBOX_STRICT
3815 /* Strict builds: Check that the value is correct. */
3816 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3817#else
3818 RT_NOREF(poff);
3819#endif
3820 return idxReg;
3821 }
3822 }
3823
3824 return UINT8_MAX;
3825}
3826
3827
3828DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
3829
3830
3831/**
3832 * Allocates argument registers for a function call.
3833 *
3834 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3835 * need to check the return value.
3836 * @param pReNative The native recompile state.
3837 * @param off The current code buffer offset.
3838 * @param cArgs The number of arguments the function call takes.
3839 */
3840DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3841{
3842 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3843 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3844 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3845 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3846
3847 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3848 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3849 else if (cArgs == 0)
3850 return true;
3851
3852 /*
3853 * Do we get luck and all register are free and not shadowing anything?
3854 */
3855 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3856 for (uint32_t i = 0; i < cArgs; i++)
3857 {
3858 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3859 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3860 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3861 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3862 }
3863 /*
3864 * Okay, not lucky so we have to free up the registers.
3865 */
3866 else
3867 for (uint32_t i = 0; i < cArgs; i++)
3868 {
3869 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3870 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3871 {
3872 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3873 {
3874 case kIemNativeWhat_Var:
3875 {
3876 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3877 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
3878 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3879 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3880 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3881
3882 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3883 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3884 else
3885 {
3886 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3887 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3888 }
3889 break;
3890 }
3891
3892 case kIemNativeWhat_Tmp:
3893 case kIemNativeWhat_Arg:
3894 case kIemNativeWhat_rc:
3895 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3896 default:
3897 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3898 }
3899
3900 }
3901 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3902 {
3903 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3904 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3905 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3906 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3907 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3908 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3909 }
3910 else
3911 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3912 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3913 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3914 }
3915 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3916 return true;
3917}
3918
3919
3920DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3921
3922
3923#if 0
3924/**
3925 * Frees a register assignment of any type.
3926 *
3927 * @param pReNative The native recompile state.
3928 * @param idxHstReg The register to free.
3929 *
3930 * @note Does not update variables.
3931 */
3932DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3933{
3934 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3935 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3936 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3937 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3938 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3939 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3940 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3941 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3942 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3943 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3944 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3945 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3946 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3947 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3948
3949 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3950 /* no flushing, right:
3951 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3952 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3953 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3954 */
3955}
3956#endif
3957
3958
3959/**
3960 * Frees a temporary register.
3961 *
3962 * Any shadow copies of guest registers assigned to the host register will not
3963 * be flushed by this operation.
3964 */
3965DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3966{
3967 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3968 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3969 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3970 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3971 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3972}
3973
3974
3975/**
3976 * Frees a temporary immediate register.
3977 *
3978 * It is assumed that the call has not modified the register, so it still hold
3979 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3980 */
3981DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3982{
3983 iemNativeRegFreeTmp(pReNative, idxHstReg);
3984}
3985
3986
3987/**
3988 * Frees a register assigned to a variable.
3989 *
3990 * The register will be disassociated from the variable.
3991 */
3992DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3993{
3994 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3995 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3996 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3998 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
3999
4000 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4001 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4002 if (!fFlushShadows)
4003 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4004 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4005 else
4006 {
4007 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4008 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4010 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4011 uint64_t fGstRegShadows = fGstRegShadowsOld;
4012 while (fGstRegShadows)
4013 {
4014 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4015 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4016
4017 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4018 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4019 }
4020 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4021 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4022 }
4023}
4024
4025
4026/**
4027 * Called right before emitting a call instruction to move anything important
4028 * out of call-volatile registers, free and flush the call-volatile registers,
4029 * optionally freeing argument variables.
4030 *
4031 * @returns New code buffer offset, UINT32_MAX on failure.
4032 * @param pReNative The native recompile state.
4033 * @param off The code buffer offset.
4034 * @param cArgs The number of arguments the function call takes.
4035 * It is presumed that the host register part of these have
4036 * been allocated as such already and won't need moving,
4037 * just freeing.
4038 * @param fKeepVars Mask of variables that should keep their register
4039 * assignments. Caller must take care to handle these.
4040 */
4041DECL_HIDDEN_THROW(uint32_t)
4042iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4043{
4044 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4045
4046 /* fKeepVars will reduce this mask. */
4047 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4048
4049 /*
4050 * Move anything important out of volatile registers.
4051 */
4052 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4053 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4054 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4055#ifdef IEMNATIVE_REG_FIXED_TMP0
4056 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4057#endif
4058 & ~g_afIemNativeCallRegs[cArgs];
4059
4060 fRegsToMove &= pReNative->Core.bmHstRegs;
4061 if (!fRegsToMove)
4062 { /* likely */ }
4063 else
4064 {
4065 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4066 while (fRegsToMove != 0)
4067 {
4068 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4069 fRegsToMove &= ~RT_BIT_32(idxReg);
4070
4071 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4072 {
4073 case kIemNativeWhat_Var:
4074 {
4075 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4076 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4077 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4078 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4079 if (!(RT_BIT_32(idxVar) & fKeepVars))
4080 {
4081 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4082 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4083 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4084 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4085 else
4086 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4087 }
4088 else
4089 fRegsToFree &= ~RT_BIT_32(idxReg);
4090 continue;
4091 }
4092
4093 case kIemNativeWhat_Arg:
4094 AssertMsgFailed(("What?!?: %u\n", idxReg));
4095 continue;
4096
4097 case kIemNativeWhat_rc:
4098 case kIemNativeWhat_Tmp:
4099 AssertMsgFailed(("Missing free: %u\n", idxReg));
4100 continue;
4101
4102 case kIemNativeWhat_FixedTmp:
4103 case kIemNativeWhat_pVCpuFixed:
4104 case kIemNativeWhat_pCtxFixed:
4105 case kIemNativeWhat_FixedReserved:
4106 case kIemNativeWhat_Invalid:
4107 case kIemNativeWhat_End:
4108 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4109 }
4110 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4111 }
4112 }
4113
4114 /*
4115 * Do the actual freeing.
4116 */
4117 if (pReNative->Core.bmHstRegs & fRegsToFree)
4118 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4119 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4120 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4121
4122 /* If there are guest register shadows in any call-volatile register, we
4123 have to clear the corrsponding guest register masks for each register. */
4124 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4125 if (fHstRegsWithGstShadow)
4126 {
4127 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4128 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4129 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4130 do
4131 {
4132 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4133 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4134
4135 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4136 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4137 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4138 } while (fHstRegsWithGstShadow != 0);
4139 }
4140
4141 return off;
4142}
4143
4144
4145/**
4146 * Flushes a set of guest register shadow copies.
4147 *
4148 * This is usually done after calling a threaded function or a C-implementation
4149 * of an instruction.
4150 *
4151 * @param pReNative The native recompile state.
4152 * @param fGstRegs Set of guest registers to flush.
4153 */
4154DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4155{
4156 /*
4157 * Reduce the mask by what's currently shadowed
4158 */
4159 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4160 fGstRegs &= bmGstRegShadowsOld;
4161 if (fGstRegs)
4162 {
4163 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4164 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4165 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4166 if (bmGstRegShadowsNew)
4167 {
4168 /*
4169 * Partial.
4170 */
4171 do
4172 {
4173 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4174 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4175 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4176 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4177 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4178
4179 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4180 fGstRegs &= ~fInThisHstReg;
4181 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4182 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4183 if (!fGstRegShadowsNew)
4184 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4185 } while (fGstRegs != 0);
4186 }
4187 else
4188 {
4189 /*
4190 * Clear all.
4191 */
4192 do
4193 {
4194 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4195 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4196 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4197 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4198 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4199
4200 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4201 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4202 } while (fGstRegs != 0);
4203 pReNative->Core.bmHstRegsWithGstShadow = 0;
4204 }
4205 }
4206}
4207
4208
4209/**
4210 * Flushes delayed write of a specific guest register.
4211 *
4212 * This must be called prior to calling CImpl functions and any helpers that use
4213 * the guest state (like raising exceptions) and such.
4214 *
4215 * This optimization has not yet been implemented. The first target would be
4216 * RIP updates, since these are the most common ones.
4217 */
4218DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4219 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4220{
4221 RT_NOREF(pReNative, enmClass, idxReg);
4222 return off;
4223}
4224
4225
4226/**
4227 * Flushes any delayed guest register writes.
4228 *
4229 * This must be called prior to calling CImpl functions and any helpers that use
4230 * the guest state (like raising exceptions) and such.
4231 *
4232 * This optimization has not yet been implemented. The first target would be
4233 * RIP updates, since these are the most common ones.
4234 */
4235DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4236{
4237 RT_NOREF(pReNative, off);
4238 return off;
4239}
4240
4241
4242#ifdef VBOX_STRICT
4243/**
4244 * Does internal register allocator sanity checks.
4245 */
4246static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
4247{
4248 /*
4249 * Iterate host registers building a guest shadowing set.
4250 */
4251 uint64_t bmGstRegShadows = 0;
4252 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
4253 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
4254 while (bmHstRegsWithGstShadow)
4255 {
4256 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
4257 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4258 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4259
4260 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4261 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
4262 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
4263 bmGstRegShadows |= fThisGstRegShadows;
4264 while (fThisGstRegShadows)
4265 {
4266 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
4267 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
4268 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
4269 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
4270 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
4271 }
4272 }
4273 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
4274 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
4275 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
4276
4277 /*
4278 * Now the other way around, checking the guest to host index array.
4279 */
4280 bmHstRegsWithGstShadow = 0;
4281 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
4282 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4283 while (bmGstRegShadows)
4284 {
4285 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
4286 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4287 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
4288
4289 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4290 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
4291 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
4292 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
4293 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4294 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4295 }
4296 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4297 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4298 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4299}
4300#endif
4301
4302
4303/*********************************************************************************************************************************
4304* Code Emitters (larger snippets) *
4305*********************************************************************************************************************************/
4306
4307/**
4308 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4309 * extending to 64-bit width.
4310 *
4311 * @returns New code buffer offset on success, UINT32_MAX on failure.
4312 * @param pReNative .
4313 * @param off The current code buffer position.
4314 * @param idxHstReg The host register to load the guest register value into.
4315 * @param enmGstReg The guest register to load.
4316 *
4317 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4318 * that is something the caller needs to do if applicable.
4319 */
4320DECL_HIDDEN_THROW(uint32_t)
4321iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4322{
4323 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4324 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4325
4326 switch (g_aGstShadowInfo[enmGstReg].cb)
4327 {
4328 case sizeof(uint64_t):
4329 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4330 case sizeof(uint32_t):
4331 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4332 case sizeof(uint16_t):
4333 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4334#if 0 /* not present in the table. */
4335 case sizeof(uint8_t):
4336 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4337#endif
4338 default:
4339 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4340 }
4341}
4342
4343
4344#ifdef VBOX_STRICT
4345/**
4346 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4347 *
4348 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4349 * Trashes EFLAGS on AMD64.
4350 */
4351static uint32_t
4352iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4353{
4354# ifdef RT_ARCH_AMD64
4355 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4356
4357 /* rol reg64, 32 */
4358 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4359 pbCodeBuf[off++] = 0xc1;
4360 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4361 pbCodeBuf[off++] = 32;
4362
4363 /* test reg32, ffffffffh */
4364 if (idxReg >= 8)
4365 pbCodeBuf[off++] = X86_OP_REX_B;
4366 pbCodeBuf[off++] = 0xf7;
4367 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4368 pbCodeBuf[off++] = 0xff;
4369 pbCodeBuf[off++] = 0xff;
4370 pbCodeBuf[off++] = 0xff;
4371 pbCodeBuf[off++] = 0xff;
4372
4373 /* je/jz +1 */
4374 pbCodeBuf[off++] = 0x74;
4375 pbCodeBuf[off++] = 0x01;
4376
4377 /* int3 */
4378 pbCodeBuf[off++] = 0xcc;
4379
4380 /* rol reg64, 32 */
4381 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4382 pbCodeBuf[off++] = 0xc1;
4383 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4384 pbCodeBuf[off++] = 32;
4385
4386# elif defined(RT_ARCH_ARM64)
4387 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4388 /* lsr tmp0, reg64, #32 */
4389 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4390 /* cbz tmp0, +1 */
4391 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4392 /* brk #0x1100 */
4393 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4394
4395# else
4396# error "Port me!"
4397# endif
4398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4399 return off;
4400}
4401#endif /* VBOX_STRICT */
4402
4403
4404#ifdef VBOX_STRICT
4405/**
4406 * Emitting code that checks that the content of register @a idxReg is the same
4407 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4408 * instruction if that's not the case.
4409 *
4410 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4411 * Trashes EFLAGS on AMD64.
4412 */
4413static uint32_t
4414iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
4415{
4416# ifdef RT_ARCH_AMD64
4417 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4418
4419 /* cmp reg, [mem] */
4420 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
4421 {
4422 if (idxReg >= 8)
4423 pbCodeBuf[off++] = X86_OP_REX_R;
4424 pbCodeBuf[off++] = 0x38;
4425 }
4426 else
4427 {
4428 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
4429 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
4430 else
4431 {
4432 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
4433 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4434 else
4435 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
4436 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
4437 if (idxReg >= 8)
4438 pbCodeBuf[off++] = X86_OP_REX_R;
4439 }
4440 pbCodeBuf[off++] = 0x39;
4441 }
4442 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
4443
4444 /* je/jz +1 */
4445 pbCodeBuf[off++] = 0x74;
4446 pbCodeBuf[off++] = 0x01;
4447
4448 /* int3 */
4449 pbCodeBuf[off++] = 0xcc;
4450
4451 /* For values smaller than the register size, we must check that the rest
4452 of the register is all zeros. */
4453 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
4454 {
4455 /* test reg64, imm32 */
4456 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4457 pbCodeBuf[off++] = 0xf7;
4458 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4459 pbCodeBuf[off++] = 0;
4460 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
4461 pbCodeBuf[off++] = 0xff;
4462 pbCodeBuf[off++] = 0xff;
4463
4464 /* je/jz +1 */
4465 pbCodeBuf[off++] = 0x74;
4466 pbCodeBuf[off++] = 0x01;
4467
4468 /* int3 */
4469 pbCodeBuf[off++] = 0xcc;
4470 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4471 }
4472 else
4473 {
4474 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4475 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
4476 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
4477 }
4478
4479# elif defined(RT_ARCH_ARM64)
4480 /* mov TMP0, [gstreg] */
4481 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
4482
4483 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4484 /* sub tmp0, tmp0, idxReg */
4485 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
4486 /* cbz tmp0, +1 */
4487 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4488 /* brk #0x1000+enmGstReg */
4489 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
4490 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4491
4492# else
4493# error "Port me!"
4494# endif
4495 return off;
4496}
4497#endif /* VBOX_STRICT */
4498
4499
4500#ifdef VBOX_STRICT
4501/**
4502 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
4503 * important bits.
4504 *
4505 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4506 * Trashes EFLAGS on AMD64.
4507 */
4508static uint32_t
4509iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
4510{
4511 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4512 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
4513 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
4514 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
4515
4516#ifdef RT_ARCH_AMD64
4517 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4518
4519 /* je/jz +1 */
4520 pbCodeBuf[off++] = 0x74;
4521 pbCodeBuf[off++] = 0x01;
4522
4523 /* int3 */
4524 pbCodeBuf[off++] = 0xcc;
4525
4526# elif defined(RT_ARCH_ARM64)
4527 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4528
4529 /* b.eq +1 */
4530 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
4531 /* brk #0x2000 */
4532 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
4533
4534# else
4535# error "Port me!"
4536# endif
4537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4538
4539 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4540 return off;
4541}
4542#endif /* VBOX_STRICT */
4543
4544
4545/**
4546 * Emits a code for checking the return code of a call and rcPassUp, returning
4547 * from the code if either are non-zero.
4548 */
4549DECL_HIDDEN_THROW(uint32_t)
4550iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
4551{
4552#ifdef RT_ARCH_AMD64
4553 /*
4554 * AMD64: eax = call status code.
4555 */
4556
4557 /* edx = rcPassUp */
4558 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4559# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4560 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
4561# endif
4562
4563 /* edx = eax | rcPassUp */
4564 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4565 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
4566 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
4567 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4568
4569 /* Jump to non-zero status return path. */
4570 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
4571
4572 /* done. */
4573
4574#elif RT_ARCH_ARM64
4575 /*
4576 * ARM64: w0 = call status code.
4577 */
4578# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4579 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
4580# endif
4581 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4582
4583 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4584
4585 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
4586
4587 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4588 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
4589 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
4590
4591#else
4592# error "port me"
4593#endif
4594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4595 return off;
4596}
4597
4598
4599/**
4600 * Emits code to check if the content of @a idxAddrReg is a canonical address,
4601 * raising a \#GP(0) if it isn't.
4602 *
4603 * @returns New code buffer offset, UINT32_MAX on failure.
4604 * @param pReNative The native recompile state.
4605 * @param off The code buffer offset.
4606 * @param idxAddrReg The host register with the address to check.
4607 * @param idxInstr The current instruction.
4608 */
4609DECL_HIDDEN_THROW(uint32_t)
4610iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
4611{
4612 /*
4613 * Make sure we don't have any outstanding guest register writes as we may
4614 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4615 */
4616 off = iemNativeRegFlushPendingWrites(pReNative, off);
4617
4618#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4619 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4620#else
4621 RT_NOREF(idxInstr);
4622#endif
4623
4624#ifdef RT_ARCH_AMD64
4625 /*
4626 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
4627 * return raisexcpt();
4628 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
4629 */
4630 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4631
4632 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
4633 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
4634 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
4635 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
4636 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4637
4638 iemNativeRegFreeTmp(pReNative, iTmpReg);
4639
4640#elif defined(RT_ARCH_ARM64)
4641 /*
4642 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
4643 * return raisexcpt();
4644 * ----
4645 * mov x1, 0x800000000000
4646 * add x1, x0, x1
4647 * cmp xzr, x1, lsr 48
4648 * b.ne .Lraisexcpt
4649 */
4650 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4651
4652 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
4653 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
4654 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
4655 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4656
4657 iemNativeRegFreeTmp(pReNative, iTmpReg);
4658
4659#else
4660# error "Port me"
4661#endif
4662 return off;
4663}
4664
4665
4666/**
4667 * Emits code to check if the content of @a idxAddrReg is within the limit of
4668 * idxSegReg, raising a \#GP(0) if it isn't.
4669 *
4670 * @returns New code buffer offset; throws VBox status code on error.
4671 * @param pReNative The native recompile state.
4672 * @param off The code buffer offset.
4673 * @param idxAddrReg The host register (32-bit) with the address to
4674 * check.
4675 * @param idxSegReg The segment register (X86_SREG_XXX) to check
4676 * against.
4677 * @param idxInstr The current instruction.
4678 */
4679DECL_HIDDEN_THROW(uint32_t)
4680iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4681 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
4682{
4683 /*
4684 * Make sure we don't have any outstanding guest register writes as we may
4685 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4686 */
4687 off = iemNativeRegFlushPendingWrites(pReNative, off);
4688
4689#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4690 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4691#else
4692 RT_NOREF(idxInstr);
4693#endif
4694
4695 /** @todo implement expand down/whatnot checking */
4696 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
4697
4698 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4699 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
4700 kIemNativeGstRegUse_ForUpdate);
4701
4702 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
4703 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4704
4705 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
4706 return off;
4707}
4708
4709
4710/**
4711 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
4712 *
4713 * @returns The flush mask.
4714 * @param fCImpl The IEM_CIMPL_F_XXX flags.
4715 * @param fGstShwFlush The starting flush mask.
4716 */
4717DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
4718{
4719 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
4720 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
4721 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
4722 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
4723 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
4724 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
4725 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
4726 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
4727 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
4728 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
4729 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
4730 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
4731 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4732 return fGstShwFlush;
4733}
4734
4735
4736/**
4737 * Emits a call to a CImpl function or something similar.
4738 */
4739DECL_HIDDEN_THROW(uint32_t)
4740iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
4741 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
4742{
4743 /*
4744 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
4745 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
4746 */
4747 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
4748 fGstShwFlush
4749 | RT_BIT_64(kIemNativeGstReg_Pc)
4750 | RT_BIT_64(kIemNativeGstReg_EFlags));
4751 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4752
4753 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4754
4755 /*
4756 * Load the parameters.
4757 */
4758#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
4759 /* Special code the hidden VBOXSTRICTRC pointer. */
4760 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4761 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4762 if (cAddParams > 0)
4763 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
4764 if (cAddParams > 1)
4765 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
4766 if (cAddParams > 2)
4767 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
4768 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4769
4770#else
4771 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4772 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4773 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4774 if (cAddParams > 0)
4775 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
4776 if (cAddParams > 1)
4777 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
4778 if (cAddParams > 2)
4779# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
4780 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
4781# else
4782 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
4783# endif
4784#endif
4785
4786 /*
4787 * Make the call.
4788 */
4789 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
4790
4791#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4792 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4793#endif
4794
4795 /*
4796 * Check the status code.
4797 */
4798 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4799}
4800
4801
4802/**
4803 * Emits a call to a threaded worker function.
4804 */
4805DECL_HIDDEN_THROW(uint32_t)
4806iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
4807{
4808 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
4809 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4810
4811#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4812 /* The threaded function may throw / long jmp, so set current instruction
4813 number if we're counting. */
4814 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4815#endif
4816
4817 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
4818
4819#ifdef RT_ARCH_AMD64
4820 /* Load the parameters and emit the call. */
4821# ifdef RT_OS_WINDOWS
4822# ifndef VBOXSTRICTRC_STRICT_ENABLED
4823 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4824 if (cParams > 0)
4825 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
4826 if (cParams > 1)
4827 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
4828 if (cParams > 2)
4829 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
4830# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
4831 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
4832 if (cParams > 0)
4833 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
4834 if (cParams > 1)
4835 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
4836 if (cParams > 2)
4837 {
4838 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
4839 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
4840 }
4841 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4842# endif /* VBOXSTRICTRC_STRICT_ENABLED */
4843# else
4844 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4845 if (cParams > 0)
4846 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
4847 if (cParams > 1)
4848 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
4849 if (cParams > 2)
4850 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
4851# endif
4852
4853 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4854
4855# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4856 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4857# endif
4858
4859#elif RT_ARCH_ARM64
4860 /*
4861 * ARM64:
4862 */
4863 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4864 if (cParams > 0)
4865 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
4866 if (cParams > 1)
4867 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
4868 if (cParams > 2)
4869 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
4870
4871 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4872
4873#else
4874# error "port me"
4875#endif
4876
4877 /*
4878 * Check the status code.
4879 */
4880 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
4881
4882 return off;
4883}
4884
4885
4886/**
4887 * Emits the code at the CheckBranchMiss label.
4888 */
4889static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4890{
4891 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
4892 if (idxLabel != UINT32_MAX)
4893 {
4894 iemNativeLabelDefine(pReNative, idxLabel, off);
4895
4896 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
4897 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4898 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
4899
4900 /* jump back to the return sequence. */
4901 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4902 }
4903 return off;
4904}
4905
4906
4907/**
4908 * Emits the code at the NeedCsLimChecking label.
4909 */
4910static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4911{
4912 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
4913 if (idxLabel != UINT32_MAX)
4914 {
4915 iemNativeLabelDefine(pReNative, idxLabel, off);
4916
4917 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
4918 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4919 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
4920
4921 /* jump back to the return sequence. */
4922 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4923 }
4924 return off;
4925}
4926
4927
4928/**
4929 * Emits the code at the ObsoleteTb label.
4930 */
4931static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4932{
4933 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
4934 if (idxLabel != UINT32_MAX)
4935 {
4936 iemNativeLabelDefine(pReNative, idxLabel, off);
4937
4938 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
4939 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4940 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
4941
4942 /* jump back to the return sequence. */
4943 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4944 }
4945 return off;
4946}
4947
4948
4949/**
4950 * Emits the code at the RaiseGP0 label.
4951 */
4952static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4953{
4954 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
4955 if (idxLabel != UINT32_MAX)
4956 {
4957 iemNativeLabelDefine(pReNative, idxLabel, off);
4958
4959 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
4960 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4961 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
4962
4963 /* jump back to the return sequence. */
4964 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4965 }
4966 return off;
4967}
4968
4969
4970/**
4971 * Emits the code at the ReturnWithFlags label (returns
4972 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
4973 */
4974static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4975{
4976 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
4977 if (idxLabel != UINT32_MAX)
4978 {
4979 iemNativeLabelDefine(pReNative, idxLabel, off);
4980
4981 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
4982
4983 /* jump back to the return sequence. */
4984 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4985 }
4986 return off;
4987}
4988
4989
4990/**
4991 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
4992 */
4993static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4994{
4995 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
4996 if (idxLabel != UINT32_MAX)
4997 {
4998 iemNativeLabelDefine(pReNative, idxLabel, off);
4999
5000 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5001
5002 /* jump back to the return sequence. */
5003 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5004 }
5005 return off;
5006}
5007
5008
5009/**
5010 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5011 */
5012static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5013{
5014 /*
5015 * Generate the rc + rcPassUp fiddling code if needed.
5016 */
5017 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5018 if (idxLabel != UINT32_MAX)
5019 {
5020 iemNativeLabelDefine(pReNative, idxLabel, off);
5021
5022 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5023#ifdef RT_ARCH_AMD64
5024# ifdef RT_OS_WINDOWS
5025# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5026 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5027# endif
5028 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5029 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5030# else
5031 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5032 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5033# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5034 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5035# endif
5036# endif
5037# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5038 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5039# endif
5040
5041#else
5042 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5043 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5044 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5045#endif
5046
5047 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5048 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5049 }
5050 return off;
5051}
5052
5053
5054/**
5055 * Emits a standard epilog.
5056 */
5057static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5058{
5059 *pidxReturnLabel = UINT32_MAX;
5060
5061 /*
5062 * Successful return, so clear the return register (eax, w0).
5063 */
5064 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5065
5066 /*
5067 * Define label for common return point.
5068 */
5069 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5070 *pidxReturnLabel = idxReturn;
5071
5072 /*
5073 * Restore registers and return.
5074 */
5075#ifdef RT_ARCH_AMD64
5076 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5077
5078 /* Reposition esp at the r15 restore point. */
5079 pbCodeBuf[off++] = X86_OP_REX_W;
5080 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5081 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5082 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5083
5084 /* Pop non-volatile registers and return */
5085 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5086 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5087 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5088 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5089 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5090 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5091 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5092 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5093# ifdef RT_OS_WINDOWS
5094 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5095 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5096# endif
5097 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5098 pbCodeBuf[off++] = 0xc9; /* leave */
5099 pbCodeBuf[off++] = 0xc3; /* ret */
5100 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5101
5102#elif RT_ARCH_ARM64
5103 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5104
5105 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5106 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5107 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5108 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5109 IEMNATIVE_FRAME_VAR_SIZE / 8);
5110 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5111 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5112 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5113 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5114 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5115 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5116 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5117 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5118 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5119 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5120 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5121 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5122
5123 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5124 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5125 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5126 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5127
5128 /* retab / ret */
5129# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5130 if (1)
5131 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5132 else
5133# endif
5134 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5135
5136#else
5137# error "port me"
5138#endif
5139 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5140
5141 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5142}
5143
5144
5145/**
5146 * Emits a standard prolog.
5147 */
5148static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5149{
5150#ifdef RT_ARCH_AMD64
5151 /*
5152 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5153 * reserving 64 bytes for stack variables plus 4 non-register argument
5154 * slots. Fixed register assignment: xBX = pReNative;
5155 *
5156 * Since we always do the same register spilling, we can use the same
5157 * unwind description for all the code.
5158 */
5159 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5160 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5161 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5162 pbCodeBuf[off++] = 0x8b;
5163 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5164 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5165 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5166# ifdef RT_OS_WINDOWS
5167 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5168 pbCodeBuf[off++] = 0x8b;
5169 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5170 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5171 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5172# else
5173 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5174 pbCodeBuf[off++] = 0x8b;
5175 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5176# endif
5177 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5178 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5179 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5180 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5181 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5182 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5183 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5184 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5185
5186 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5187 X86_GREG_xSP,
5188 IEMNATIVE_FRAME_ALIGN_SIZE
5189 + IEMNATIVE_FRAME_VAR_SIZE
5190 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5191 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5192 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5193 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5194 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5195
5196#elif RT_ARCH_ARM64
5197 /*
5198 * We set up a stack frame exactly like on x86, only we have to push the
5199 * return address our selves here. We save all non-volatile registers.
5200 */
5201 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5202
5203# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5204 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5205 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5206 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5207 /* pacibsp */
5208 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5209# endif
5210
5211 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5212 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5213 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5214 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5215 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5216 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5217 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5218 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5219 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5220 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5221 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5222 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5223 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5224 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5225 /* Save the BP and LR (ret address) registers at the top of the frame. */
5226 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5227 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5228 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5229 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5230 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
5231 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
5232
5233 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
5234 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
5235
5236 /* mov r28, r0 */
5237 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
5238 /* mov r27, r1 */
5239 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
5240
5241#else
5242# error "port me"
5243#endif
5244 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5245 return off;
5246}
5247
5248
5249
5250
5251/*********************************************************************************************************************************
5252* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
5253*********************************************************************************************************************************/
5254
5255#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
5256 { \
5257 Assert(pReNative->Core.bmVars == 0); \
5258 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
5259 Assert(pReNative->Core.bmStack == 0); \
5260 pReNative->fMc = (a_fMcFlags); \
5261 pReNative->fCImpl = (a_fCImplFlags); \
5262 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
5263
5264/** We have to get to the end in recompilation mode, as otherwise we won't
5265 * generate code for all the IEM_MC_IF_XXX branches. */
5266#define IEM_MC_END() \
5267 iemNativeVarFreeAll(pReNative); \
5268 } return off
5269
5270
5271
5272/*********************************************************************************************************************************
5273* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
5274*********************************************************************************************************************************/
5275
5276#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
5277 pReNative->fMc = 0; \
5278 pReNative->fCImpl = (a_fFlags); \
5279 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
5280
5281
5282#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5283 pReNative->fMc = 0; \
5284 pReNative->fCImpl = (a_fFlags); \
5285 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
5286
5287DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5288 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5289 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
5290{
5291 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
5292}
5293
5294
5295#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5296 pReNative->fMc = 0; \
5297 pReNative->fCImpl = (a_fFlags); \
5298 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5299 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
5300
5301DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5302 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5303 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
5304{
5305 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
5306}
5307
5308
5309#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5310 pReNative->fMc = 0; \
5311 pReNative->fCImpl = (a_fFlags); \
5312 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5313 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
5314
5315DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5316 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5317 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
5318 uint64_t uArg2)
5319{
5320 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
5321}
5322
5323
5324
5325/*********************************************************************************************************************************
5326* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
5327*********************************************************************************************************************************/
5328
5329/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
5330 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5331DECL_INLINE_THROW(uint32_t)
5332iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5333{
5334 /*
5335 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5336 * return with special status code and make the execution loop deal with
5337 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5338 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5339 * could continue w/o interruption, it probably will drop into the
5340 * debugger, so not worth the effort of trying to services it here and we
5341 * just lump it in with the handling of the others.
5342 *
5343 * To simplify the code and the register state management even more (wrt
5344 * immediate in AND operation), we always update the flags and skip the
5345 * extra check associated conditional jump.
5346 */
5347 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5348 <= UINT32_MAX);
5349 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5350 kIemNativeGstRegUse_ForUpdate);
5351 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5352 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5353 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5354 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5355 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5356
5357 /* Free but don't flush the EFLAGS register. */
5358 iemNativeRegFreeTmp(pReNative, idxEflReg);
5359
5360 return off;
5361}
5362
5363
5364#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
5365 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5366
5367#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
5368 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
5369 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5370
5371/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5372DECL_INLINE_THROW(uint32_t)
5373iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5374{
5375 /* Allocate a temporary PC register. */
5376 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5377
5378 /* Perform the addition and store the result. */
5379 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5380 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5381
5382 /* Free but don't flush the PC register. */
5383 iemNativeRegFreeTmp(pReNative, idxPcReg);
5384
5385 return off;
5386}
5387
5388
5389#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
5390 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5391
5392#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
5393 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
5394 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5395
5396/** Same as iemRegAddToEip32AndFinishingNoFlags. */
5397DECL_INLINE_THROW(uint32_t)
5398iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5399{
5400 /* Allocate a temporary PC register. */
5401 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5402
5403 /* Perform the addition and store the result. */
5404 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5405 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5406
5407 /* Free but don't flush the PC register. */
5408 iemNativeRegFreeTmp(pReNative, idxPcReg);
5409
5410 return off;
5411}
5412
5413
5414#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
5415 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5416
5417#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
5418 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
5419 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5420
5421/** Same as iemRegAddToIp16AndFinishingNoFlags. */
5422DECL_INLINE_THROW(uint32_t)
5423iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5424{
5425 /* Allocate a temporary PC register. */
5426 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5427
5428 /* Perform the addition and store the result. */
5429 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5430 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5431 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5432
5433 /* Free but don't flush the PC register. */
5434 iemNativeRegFreeTmp(pReNative, idxPcReg);
5435
5436 return off;
5437}
5438
5439
5440
5441/*********************************************************************************************************************************
5442* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
5443*********************************************************************************************************************************/
5444
5445#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
5446 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5447 (a_enmEffOpSize), pCallEntry->idxInstr)
5448
5449#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5450 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
5451 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5452
5453#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
5454 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5455 IEMMODE_16BIT, pCallEntry->idxInstr)
5456
5457#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
5458 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
5459 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5460
5461#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
5462 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5463 IEMMODE_64BIT, pCallEntry->idxInstr)
5464
5465#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
5466 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
5467 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5468
5469/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
5470 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
5471 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
5472DECL_INLINE_THROW(uint32_t)
5473iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5474 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5475{
5476 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
5477
5478 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5479 off = iemNativeRegFlushPendingWrites(pReNative, off);
5480
5481 /* Allocate a temporary PC register. */
5482 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5483
5484 /* Perform the addition. */
5485 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
5486
5487 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
5488 {
5489 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5490 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5491 }
5492 else
5493 {
5494 /* Just truncate the result to 16-bit IP. */
5495 Assert(enmEffOpSize == IEMMODE_16BIT);
5496 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5497 }
5498 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5499
5500 /* Free but don't flush the PC register. */
5501 iemNativeRegFreeTmp(pReNative, idxPcReg);
5502
5503 return off;
5504}
5505
5506
5507#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
5508 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5509 (a_enmEffOpSize), pCallEntry->idxInstr)
5510
5511#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5512 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
5513 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5514
5515#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
5516 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5517 IEMMODE_16BIT, pCallEntry->idxInstr)
5518
5519#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
5520 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
5521 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5522
5523#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
5524 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5525 IEMMODE_32BIT, pCallEntry->idxInstr)
5526
5527#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
5528 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
5529 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5530
5531/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
5532 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
5533 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
5534DECL_INLINE_THROW(uint32_t)
5535iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5536 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5537{
5538 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
5539
5540 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5541 off = iemNativeRegFlushPendingWrites(pReNative, off);
5542
5543 /* Allocate a temporary PC register. */
5544 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5545
5546 /* Perform the addition. */
5547 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5548
5549 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
5550 if (enmEffOpSize == IEMMODE_16BIT)
5551 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5552
5553 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
5554 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5555
5556 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5557
5558 /* Free but don't flush the PC register. */
5559 iemNativeRegFreeTmp(pReNative, idxPcReg);
5560
5561 return off;
5562}
5563
5564
5565#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
5566 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
5567
5568#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
5569 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
5570 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5571
5572#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
5573 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
5574
5575#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
5576 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
5577 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5578
5579#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
5580 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
5581
5582#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
5583 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
5584 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5585
5586/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
5587DECL_INLINE_THROW(uint32_t)
5588iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5589 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
5590{
5591 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5592 off = iemNativeRegFlushPendingWrites(pReNative, off);
5593
5594 /* Allocate a temporary PC register. */
5595 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5596
5597 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
5598 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5599 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5600 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5601 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5602
5603 /* Free but don't flush the PC register. */
5604 iemNativeRegFreeTmp(pReNative, idxPcReg);
5605
5606 return off;
5607}
5608
5609
5610
5611/*********************************************************************************************************************************
5612* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
5613*********************************************************************************************************************************/
5614
5615/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
5616#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
5617 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5618
5619/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
5620#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
5621 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5622
5623/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
5624#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
5625 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5626
5627/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
5628 * clears flags. */
5629#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
5630 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
5631 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5632
5633/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
5634 * clears flags. */
5635#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
5636 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
5637 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5638
5639/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
5640 * clears flags. */
5641#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
5642 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
5643 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5644
5645#undef IEM_MC_SET_RIP_U16_AND_FINISH
5646
5647
5648/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
5649#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
5650 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5651
5652/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
5653#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
5654 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5655
5656/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
5657 * clears flags. */
5658#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
5659 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
5660 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5661
5662/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
5663 * and clears flags. */
5664#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
5665 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
5666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5667
5668#undef IEM_MC_SET_RIP_U32_AND_FINISH
5669
5670
5671/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
5672#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
5673 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
5674
5675/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
5676 * and clears flags. */
5677#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
5678 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
5679 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5680
5681#undef IEM_MC_SET_RIP_U64_AND_FINISH
5682
5683
5684/** Same as iemRegRipJumpU16AndFinishNoFlags,
5685 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
5686DECL_INLINE_THROW(uint32_t)
5687iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
5688 uint8_t idxInstr, uint8_t cbVar)
5689{
5690 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
5691 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
5692
5693 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5694 off = iemNativeRegFlushPendingWrites(pReNative, off);
5695
5696 /* Get a register with the new PC loaded from idxVarPc.
5697 Note! This ASSUMES that the high bits of the GPR is zeroed. */
5698 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
5699
5700 /* Check limit (may #GP(0) + exit TB). */
5701 if (!f64Bit)
5702 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5703 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5704 else if (cbVar > sizeof(uint32_t))
5705 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5706
5707 /* Store the result. */
5708 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5709
5710 /** @todo implictly free the variable? */
5711
5712 return off;
5713}
5714
5715
5716
5717/*********************************************************************************************************************************
5718* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
5719*********************************************************************************************************************************/
5720
5721/**
5722 * Pushes an IEM_MC_IF_XXX onto the condition stack.
5723 *
5724 * @returns Pointer to the condition stack entry on success, NULL on failure
5725 * (too many nestings)
5726 */
5727DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
5728{
5729 uint32_t const idxStack = pReNative->cCondDepth;
5730 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
5731
5732 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
5733 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
5734
5735 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
5736 pEntry->fInElse = false;
5737 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
5738 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
5739
5740 return pEntry;
5741}
5742
5743
5744/**
5745 * Start of the if-block, snapshotting the register and variable state.
5746 */
5747DECL_INLINE_THROW(void)
5748iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
5749{
5750 Assert(offIfBlock != UINT32_MAX);
5751 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5752 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5753 Assert(!pEntry->fInElse);
5754
5755 /* Define the start of the IF block if request or for disassembly purposes. */
5756 if (idxLabelIf != UINT32_MAX)
5757 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
5758#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5759 else
5760 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
5761#else
5762 RT_NOREF(offIfBlock);
5763#endif
5764
5765 /* Copy the initial state so we can restore it in the 'else' block. */
5766 pEntry->InitialState = pReNative->Core;
5767}
5768
5769
5770#define IEM_MC_ELSE() } while (0); \
5771 off = iemNativeEmitElse(pReNative, off); \
5772 do {
5773
5774/** Emits code related to IEM_MC_ELSE. */
5775DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5776{
5777 /* Check sanity and get the conditional stack entry. */
5778 Assert(off != UINT32_MAX);
5779 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5780 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5781 Assert(!pEntry->fInElse);
5782
5783 /* Jump to the endif */
5784 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
5785
5786 /* Define the else label and enter the else part of the condition. */
5787 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5788 pEntry->fInElse = true;
5789
5790 /* Snapshot the core state so we can do a merge at the endif and restore
5791 the snapshot we took at the start of the if-block. */
5792 pEntry->IfFinalState = pReNative->Core;
5793 pReNative->Core = pEntry->InitialState;
5794
5795 return off;
5796}
5797
5798
5799#define IEM_MC_ENDIF() } while (0); \
5800 off = iemNativeEmitEndIf(pReNative, off)
5801
5802/** Emits code related to IEM_MC_ENDIF. */
5803DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5804{
5805 /* Check sanity and get the conditional stack entry. */
5806 Assert(off != UINT32_MAX);
5807 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5808 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5809
5810 /*
5811 * Now we have find common group with the core state at the end of the
5812 * if-final. Use the smallest common denominator and just drop anything
5813 * that isn't the same in both states.
5814 */
5815 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
5816 * which is why we're doing this at the end of the else-block.
5817 * But we'd need more info about future for that to be worth the effort. */
5818 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
5819 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
5820 {
5821 /* shadow guest stuff first. */
5822 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
5823 if (fGstRegs)
5824 {
5825 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
5826 do
5827 {
5828 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5829 fGstRegs &= ~RT_BIT_64(idxGstReg);
5830
5831 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5832 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
5833 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
5834 {
5835 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
5836 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
5837 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
5838 }
5839 } while (fGstRegs);
5840 }
5841 else
5842 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
5843
5844 /* Check variables next. For now we must require them to be identical
5845 or stuff we can recreate. */
5846 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
5847 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
5848 if (fVars)
5849 {
5850 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
5851 do
5852 {
5853 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
5854 fVars &= ~RT_BIT_32(idxVar);
5855
5856 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
5857 {
5858 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
5859 continue;
5860 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5861 {
5862 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5863 if (idxHstReg != UINT8_MAX)
5864 {
5865 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5866 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5867 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
5868 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5869 }
5870 continue;
5871 }
5872 }
5873 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
5874 continue;
5875
5876 /* Irreconcilable, so drop it. */
5877 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5878 if (idxHstReg != UINT8_MAX)
5879 {
5880 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5881 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5882 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
5883 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5884 }
5885 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
5886 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5887 } while (fVars);
5888 }
5889
5890 /* Finally, check that the host register allocations matches. */
5891 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
5892 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
5893 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
5894 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
5895 }
5896
5897 /*
5898 * Define the endif label and maybe the else one if we're still in the 'if' part.
5899 */
5900 if (!pEntry->fInElse)
5901 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5902 else
5903 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
5904 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
5905
5906 /* Pop the conditional stack.*/
5907 pReNative->cCondDepth -= 1;
5908
5909 return off;
5910}
5911
5912
5913#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
5914 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
5915 do {
5916
5917/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
5918DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5919{
5920 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5921
5922 /* Get the eflags. */
5923 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5924 kIemNativeGstRegUse_ReadOnly);
5925
5926 /* Test and jump. */
5927 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5928
5929 /* Free but don't flush the EFlags register. */
5930 iemNativeRegFreeTmp(pReNative, idxEflReg);
5931
5932 /* Make a copy of the core state now as we start the if-block. */
5933 iemNativeCondStartIfBlock(pReNative, off);
5934
5935 return off;
5936}
5937
5938
5939#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
5940 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
5941 do {
5942
5943/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
5944DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5945{
5946 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5947
5948 /* Get the eflags. */
5949 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5950 kIemNativeGstRegUse_ReadOnly);
5951
5952 /* Test and jump. */
5953 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5954
5955 /* Free but don't flush the EFlags register. */
5956 iemNativeRegFreeTmp(pReNative, idxEflReg);
5957
5958 /* Make a copy of the core state now as we start the if-block. */
5959 iemNativeCondStartIfBlock(pReNative, off);
5960
5961 return off;
5962}
5963
5964
5965#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
5966 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
5967 do {
5968
5969/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
5970DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5971{
5972 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5973
5974 /* Get the eflags. */
5975 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5976 kIemNativeGstRegUse_ReadOnly);
5977
5978 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5979 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5980
5981 /* Test and jump. */
5982 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5983
5984 /* Free but don't flush the EFlags register. */
5985 iemNativeRegFreeTmp(pReNative, idxEflReg);
5986
5987 /* Make a copy of the core state now as we start the if-block. */
5988 iemNativeCondStartIfBlock(pReNative, off);
5989
5990 return off;
5991}
5992
5993
5994#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
5995 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
5996 do {
5997
5998/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
5999DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6000{
6001 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6002
6003 /* Get the eflags. */
6004 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6005 kIemNativeGstRegUse_ReadOnly);
6006
6007 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6008 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6009
6010 /* Test and jump. */
6011 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6012
6013 /* Free but don't flush the EFlags register. */
6014 iemNativeRegFreeTmp(pReNative, idxEflReg);
6015
6016 /* Make a copy of the core state now as we start the if-block. */
6017 iemNativeCondStartIfBlock(pReNative, off);
6018
6019 return off;
6020}
6021
6022
6023#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6024 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6025 do {
6026
6027#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6028 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6029 do {
6030
6031/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6032DECL_INLINE_THROW(uint32_t)
6033iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6034 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6035{
6036 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6037
6038 /* Get the eflags. */
6039 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6040 kIemNativeGstRegUse_ReadOnly);
6041
6042 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6043 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6044
6045 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6046 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6047 Assert(iBitNo1 != iBitNo2);
6048
6049#ifdef RT_ARCH_AMD64
6050 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6051
6052 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6053 if (iBitNo1 > iBitNo2)
6054 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6055 else
6056 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6057 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6058
6059#elif defined(RT_ARCH_ARM64)
6060 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6061 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6062
6063 /* and tmpreg, eflreg, #1<<iBitNo1 */
6064 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6065
6066 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6067 if (iBitNo1 > iBitNo2)
6068 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6069 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6070 else
6071 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6072 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6073
6074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6075
6076#else
6077# error "Port me"
6078#endif
6079
6080 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6081 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6082 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6083
6084 /* Free but don't flush the EFlags and tmp registers. */
6085 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6086 iemNativeRegFreeTmp(pReNative, idxEflReg);
6087
6088 /* Make a copy of the core state now as we start the if-block. */
6089 iemNativeCondStartIfBlock(pReNative, off);
6090
6091 return off;
6092}
6093
6094
6095#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6096 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6097 do {
6098
6099#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6100 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6101 do {
6102
6103/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6104 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6105DECL_INLINE_THROW(uint32_t)
6106iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6107 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6108{
6109 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6110
6111 /* We need an if-block label for the non-inverted variant. */
6112 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6113 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6114
6115 /* Get the eflags. */
6116 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6117 kIemNativeGstRegUse_ReadOnly);
6118
6119 /* Translate the flag masks to bit numbers. */
6120 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6121 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6122
6123 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6124 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6125 Assert(iBitNo1 != iBitNo);
6126
6127 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6128 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6129 Assert(iBitNo2 != iBitNo);
6130 Assert(iBitNo2 != iBitNo1);
6131
6132#ifdef RT_ARCH_AMD64
6133 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6134#elif defined(RT_ARCH_ARM64)
6135 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6136#endif
6137
6138 /* Check for the lone bit first. */
6139 if (!fInverted)
6140 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6141 else
6142 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6143
6144 /* Then extract and compare the other two bits. */
6145#ifdef RT_ARCH_AMD64
6146 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6147 if (iBitNo1 > iBitNo2)
6148 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6149 else
6150 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6151 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6152
6153#elif defined(RT_ARCH_ARM64)
6154 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6155
6156 /* and tmpreg, eflreg, #1<<iBitNo1 */
6157 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6158
6159 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6160 if (iBitNo1 > iBitNo2)
6161 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6162 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6163 else
6164 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6165 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6166
6167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6168
6169#else
6170# error "Port me"
6171#endif
6172
6173 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6174 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6175 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6176
6177 /* Free but don't flush the EFlags and tmp registers. */
6178 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6179 iemNativeRegFreeTmp(pReNative, idxEflReg);
6180
6181 /* Make a copy of the core state now as we start the if-block. */
6182 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
6183
6184 return off;
6185}
6186
6187
6188#define IEM_MC_IF_CX_IS_NZ() \
6189 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
6190 do {
6191
6192/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6193DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6194{
6195 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6196
6197 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6198 kIemNativeGstRegUse_ReadOnly);
6199 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6200 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6201
6202 iemNativeCondStartIfBlock(pReNative, off);
6203 return off;
6204}
6205
6206
6207#define IEM_MC_IF_ECX_IS_NZ() \
6208 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
6209 do {
6210
6211#define IEM_MC_IF_RCX_IS_NZ() \
6212 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
6213 do {
6214
6215/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
6216DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6217{
6218 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6219
6220 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6221 kIemNativeGstRegUse_ReadOnly);
6222 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6223 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6224
6225 iemNativeCondStartIfBlock(pReNative, off);
6226 return off;
6227}
6228
6229
6230#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6231 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
6232 do {
6233
6234#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6235 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
6236 do {
6237
6238/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6239DECL_INLINE_THROW(uint32_t)
6240iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
6241{
6242 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6243
6244 /* We have to load both RCX and EFLAGS before we can start branching,
6245 otherwise we'll end up in the else-block with an inconsistent
6246 register allocator state.
6247 Doing EFLAGS first as it's more likely to be loaded, right? */
6248 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6249 kIemNativeGstRegUse_ReadOnly);
6250 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6251 kIemNativeGstRegUse_ReadOnly);
6252
6253 /** @todo we could reduce this to a single branch instruction by spending a
6254 * temporary register and some setnz stuff. Not sure if loops are
6255 * worth it. */
6256 /* Check CX. */
6257 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6258
6259 /* Check the EFlags bit. */
6260 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6261 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6262 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6263 !fCheckIfSet /*fJmpIfSet*/);
6264
6265 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6266 iemNativeRegFreeTmp(pReNative, idxEflReg);
6267
6268 iemNativeCondStartIfBlock(pReNative, off);
6269 return off;
6270}
6271
6272
6273#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6274 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
6275 do {
6276
6277#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6278 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
6279 do {
6280
6281#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6282 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
6283 do {
6284
6285#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6286 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
6287 do {
6288
6289/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
6290 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
6291 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
6292 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
6293DECL_INLINE_THROW(uint32_t)
6294iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6295 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
6296{
6297 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6298
6299 /* We have to load both RCX and EFLAGS before we can start branching,
6300 otherwise we'll end up in the else-block with an inconsistent
6301 register allocator state.
6302 Doing EFLAGS first as it's more likely to be loaded, right? */
6303 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6304 kIemNativeGstRegUse_ReadOnly);
6305 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6306 kIemNativeGstRegUse_ReadOnly);
6307
6308 /** @todo we could reduce this to a single branch instruction by spending a
6309 * temporary register and some setnz stuff. Not sure if loops are
6310 * worth it. */
6311 /* Check RCX/ECX. */
6312 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6313
6314 /* Check the EFlags bit. */
6315 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6316 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6317 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6318 !fCheckIfSet /*fJmpIfSet*/);
6319
6320 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6321 iemNativeRegFreeTmp(pReNative, idxEflReg);
6322
6323 iemNativeCondStartIfBlock(pReNative, off);
6324 return off;
6325}
6326
6327
6328
6329/*********************************************************************************************************************************
6330* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6331*********************************************************************************************************************************/
6332/** Number of hidden arguments for CIMPL calls.
6333 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
6334#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6335# define IEM_CIMPL_HIDDEN_ARGS 3
6336#else
6337# define IEM_CIMPL_HIDDEN_ARGS 2
6338#endif
6339
6340#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
6341 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
6342
6343#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
6344 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
6345
6346#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
6347 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
6348
6349#define IEM_MC_LOCAL(a_Type, a_Name) \
6350 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
6351
6352#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
6353 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
6354
6355
6356/**
6357 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
6358 */
6359DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
6360{
6361 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
6362 return IEM_CIMPL_HIDDEN_ARGS;
6363 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
6364 return 1;
6365 return 0;
6366}
6367
6368
6369/**
6370 * Internal work that allocates a variable with kind set to
6371 * kIemNativeVarKind_Invalid and no current stack allocation.
6372 *
6373 * The kind will either be set by the caller or later when the variable is first
6374 * assigned a value.
6375 */
6376static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6377{
6378 Assert(cbType > 0 && cbType <= 64);
6379 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6380 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6381 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6382 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6383 pReNative->Core.aVars[idxVar].cbVar = cbType;
6384 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6385 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6386 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6387 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6388 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6389 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6390 pReNative->Core.aVars[idxVar].u.uValue = 0;
6391 return idxVar;
6392}
6393
6394
6395/**
6396 * Internal work that allocates an argument variable w/o setting enmKind.
6397 */
6398static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6399{
6400 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6401 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6402 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6403
6404 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6405 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
6406 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6407 return idxVar;
6408}
6409
6410
6411/**
6412 * Gets the stack slot for a stack variable, allocating one if necessary.
6413 *
6414 * Calling this function implies that the stack slot will contain a valid
6415 * variable value. The caller deals with any register currently assigned to the
6416 * variable, typically by spilling it into the stack slot.
6417 *
6418 * @returns The stack slot number.
6419 * @param pReNative The recompiler state.
6420 * @param idxVar The variable.
6421 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6422 */
6423DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6424{
6425 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6426 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6427
6428 /* Already got a slot? */
6429 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6430 if (idxStackSlot != UINT8_MAX)
6431 {
6432 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6433 return idxStackSlot;
6434 }
6435
6436 /*
6437 * A single slot is easy to allocate.
6438 * Allocate them from the top end, closest to BP, to reduce the displacement.
6439 */
6440 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
6441 {
6442 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6443 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6444 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6445 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6446 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
6447 return (uint8_t)iSlot;
6448 }
6449
6450 /*
6451 * We need more than one stack slot.
6452 *
6453 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6454 */
6455 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6456 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
6457 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
6458 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
6459 uint32_t bmStack = ~pReNative->Core.bmStack;
6460 while (bmStack != UINT32_MAX)
6461 {
6462/** @todo allocate from the top to reduce BP displacement. */
6463 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6464 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6465 if (!(iSlot & fBitAlignMask))
6466 {
6467 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6468 {
6469 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6470 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6471 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
6472 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
6473 return (uint8_t)iSlot;
6474 }
6475 }
6476 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6477 }
6478 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6479}
6480
6481
6482/**
6483 * Changes the variable to a stack variable.
6484 *
6485 * Currently this is s only possible to do the first time the variable is used,
6486 * switching later is can be implemented but not done.
6487 *
6488 * @param pReNative The recompiler state.
6489 * @param idxVar The variable.
6490 * @throws VERR_IEM_VAR_IPE_2
6491 */
6492static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6493{
6494 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6495 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6496 {
6497 /* We could in theory transition from immediate to stack as well, but it
6498 would involve the caller doing work storing the value on the stack. So,
6499 till that's required we only allow transition from invalid. */
6500 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6501 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6502 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6503 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
6504
6505 /* Note! We don't allocate a stack slot here, that's only done when a
6506 slot is actually needed to hold a variable value. */
6507 }
6508}
6509
6510
6511/**
6512 * Sets it to a variable with a constant value.
6513 *
6514 * This does not require stack storage as we know the value and can always
6515 * reload it, unless of course it's referenced.
6516 *
6517 * @param pReNative The recompiler state.
6518 * @param idxVar The variable.
6519 * @param uValue The immediate value.
6520 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6521 */
6522static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6523{
6524 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6525 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
6526 {
6527 /* Only simple transitions for now. */
6528 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6529 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6530 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
6531 }
6532 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6533
6534 pReNative->Core.aVars[idxVar].u.uValue = uValue;
6535 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
6536 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
6537 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
6538}
6539
6540
6541/**
6542 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6543 *
6544 * This does not require stack storage as we know the value and can always
6545 * reload it. Loading is postponed till needed.
6546 *
6547 * @param pReNative The recompiler state.
6548 * @param idxVar The variable.
6549 * @param idxOtherVar The variable to take the (stack) address of.
6550 *
6551 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6552 */
6553static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6554{
6555 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6556 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6557
6558 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6559 {
6560 /* Only simple transitions for now. */
6561 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6562 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6563 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6564 }
6565 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6566
6567 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
6568
6569 /* Update the other variable, ensure it's a stack variable. */
6570 /** @todo handle variables with const values... that'll go boom now. */
6571 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6572 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
6573}
6574
6575
6576/**
6577 * Sets the variable to a reference (pointer) to a guest register reference.
6578 *
6579 * This does not require stack storage as we know the value and can always
6580 * reload it. Loading is postponed till needed.
6581 *
6582 * @param pReNative The recompiler state.
6583 * @param idxVar The variable.
6584 * @param enmRegClass The class guest registers to reference.
6585 * @param idxReg The register within @a enmRegClass to reference.
6586 *
6587 * @throws VERR_IEM_VAR_IPE_2
6588 */
6589static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6590 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6591{
6592 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6593
6594 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
6595 {
6596 /* Only simple transitions for now. */
6597 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6598 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6599 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
6600 }
6601 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6602
6603 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
6604 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
6605}
6606
6607
6608DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6609{
6610 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6611}
6612
6613
6614DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6615{
6616 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6617
6618 /* Since we're using a generic uint64_t value type, we must truncate it if
6619 the variable is smaller otherwise we may end up with too large value when
6620 scaling up a imm8 w/ sign-extension.
6621
6622 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6623 in the bios, bx=1) when running on arm, because clang expect 16-bit
6624 register parameters to have bits 16 and up set to zero. Instead of
6625 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6626 CF value in the result. */
6627 switch (cbType)
6628 {
6629 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6630 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6631 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6632 }
6633 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6634 return idxVar;
6635}
6636
6637
6638DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6639{
6640 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6641 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6642 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6643 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6644
6645 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6646 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
6647 return idxArgVar;
6648}
6649
6650
6651DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6652{
6653 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6654 /* Don't set to stack now, leave that to the first use as for instance
6655 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6656 return idxVar;
6657}
6658
6659
6660DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6661{
6662 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6663
6664 /* Since we're using a generic uint64_t value type, we must truncate it if
6665 the variable is smaller otherwise we may end up with too large value when
6666 scaling up a imm8 w/ sign-extension. */
6667 switch (cbType)
6668 {
6669 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6670 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6671 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6672 }
6673 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6674 return idxVar;
6675}
6676
6677
6678/**
6679 * Releases the variable's register.
6680 *
6681 * The register must have been previously acquired calling
6682 * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
6683 * iemNativeVarRegisterSetAndAcquire().
6684 */
6685DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6686{
6687 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6688 Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
6689 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6690}
6691
6692
6693/**
6694 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6695 * fixed till we call iemNativeVarRegisterRelease.
6696 *
6697 * @returns The host register number.
6698 * @param pReNative The recompiler state.
6699 * @param idxVar The variable.
6700 * @param poff Pointer to the instruction buffer offset.
6701 * In case a register needs to be freed up or the value
6702 * loaded off the stack.
6703 * @param fInitialized Set if the variable must already have been initialized.
6704 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6705 * the case.
6706 * @param idxRegPref Preferred register number or UINT8_MAX.
6707 */
6708DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6709 bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX)
6710{
6711 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6712 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
6713 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6714
6715 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6716 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6717 {
6718 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
6719 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6720 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6721 return idxReg;
6722 }
6723
6724 /*
6725 * If the kind of variable has not yet been set, default to 'stack'.
6726 */
6727 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
6728 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6729 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
6730 iemNativeVarSetKindToStack(pReNative, idxVar);
6731
6732 /*
6733 * We have to allocate a register for the variable, even if its a stack one
6734 * as we don't know if there are modification being made to it before its
6735 * finalized (todo: analyze and insert hints about that?).
6736 *
6737 * If we can, we try get the correct register for argument variables. This
6738 * is assuming that most argument variables are fetched as close as possible
6739 * to the actual call, so that there aren't any interfering hidden calls
6740 * (memory accesses, etc) inbetween.
6741 *
6742 * If we cannot or it's a variable, we make sure no argument registers
6743 * that will be used by this MC block will be allocated here, and we always
6744 * prefer non-volatile registers to avoid needing to spill stuff for internal
6745 * call.
6746 */
6747 /** @todo Detect too early argument value fetches and warn about hidden
6748 * calls causing less optimal code to be generated in the python script. */
6749
6750 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6751 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6752 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6753 {
6754 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6755 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6756 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6757 }
6758 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6759 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6760 {
6761 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6762 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6763 & ~pReNative->Core.bmHstRegsWithGstShadow
6764 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6765 & fNotArgsMask;
6766 if (fRegs)
6767 {
6768 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6769 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6770 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6771 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6772 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6773 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6774 }
6775 else
6776 {
6777 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6778 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6779 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6780 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6781 }
6782 }
6783 else
6784 {
6785 idxReg = idxRegPref;
6786 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6787 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
6788 }
6789 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6790 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6791
6792 /*
6793 * Load it off the stack if we've got a stack slot.
6794 */
6795 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6796 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6797 {
6798 Assert(fInitialized);
6799 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6800 switch (pReNative->Core.aVars[idxVar].cbVar)
6801 {
6802 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6803 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6804 case 3: AssertFailed(); RT_FALL_THRU();
6805 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6806 default: AssertFailed(); RT_FALL_THRU();
6807 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6808 }
6809 }
6810 else
6811 {
6812 Assert(idxStackSlot == UINT8_MAX);
6813 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6814 }
6815 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6816 return idxReg;
6817}
6818
6819
6820/**
6821 * The value of variable @a idxVar will be written in full to the @a enmGstReg
6822 * guest register.
6823 *
6824 * This function makes sure there is a register for it and sets it to be the
6825 * current shadow copy of @a enmGstReg.
6826 *
6827 * @returns The host register number.
6828 * @param pReNative The recompiler state.
6829 * @param idxVar The variable.
6830 * @param enmGstReg The guest register this variable will be written to
6831 * after this call.
6832 * @param poff Pointer to the instruction buffer offset.
6833 * In case a register needs to be freed up or if the
6834 * variable content needs to be loaded off the stack.
6835 *
6836 * @note We DO NOT expect @a idxVar to be an argument variable,
6837 * because we can only in the commit stage of an instruction when this
6838 * function is used.
6839 */
6840DECL_HIDDEN_THROW(uint8_t)
6841iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
6842{
6843 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6844 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6845 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
6846 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
6847 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
6848 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
6849 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
6850 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6851
6852 /*
6853 * This shouldn't ever be used for arguments, unless it's in a weird else
6854 * branch that doesn't do any calling and even then it's questionable.
6855 *
6856 * However, in case someone writes crazy wrong MC code and does register
6857 * updates before making calls, just use the regular register allocator to
6858 * ensure we get a register suitable for the intended argument number.
6859 */
6860 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
6861
6862 /*
6863 * If there is already a register for the variable, we transfer/set the
6864 * guest shadow copy assignment to it.
6865 */
6866 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6867 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6868 {
6869 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
6870 {
6871 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
6872 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
6873 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
6874 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
6875 }
6876 else
6877 {
6878 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
6879 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
6880 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
6881 }
6882 /** @todo figure this one out. We need some way of making sure the register isn't
6883 * modified after this point, just in case we start writing crappy MC code. */
6884 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
6885 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6886 return idxReg;
6887 }
6888 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6889
6890 /*
6891 * Because this is supposed to be the commit stage, we're just tag along with the
6892 * temporary register allocator and upgrade it to a variable register.
6893 */
6894 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
6895 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
6896 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
6897 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
6898 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
6899 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6900
6901 /*
6902 * Now we need to load the register value.
6903 */
6904 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
6905 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
6906 else
6907 {
6908 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6909 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6910 switch (pReNative->Core.aVars[idxVar].cbVar)
6911 {
6912 case sizeof(uint64_t):
6913 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
6914 break;
6915 case sizeof(uint32_t):
6916 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
6917 break;
6918 case sizeof(uint16_t):
6919 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
6920 break;
6921 case sizeof(uint8_t):
6922 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
6923 break;
6924 default:
6925 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6926 }
6927 }
6928
6929 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6930 return idxReg;
6931}
6932
6933
6934/**
6935 * Sets the host register for @a idxVarRc to @a idxReg.
6936 *
6937 * The register must not be allocated. Any guest register shadowing will be
6938 * implictly dropped by this call.
6939 *
6940 * The variable must not have any register associated with it (causes
6941 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
6942 * implied.
6943 *
6944 * @returns idxReg
6945 * @param pReNative The recompiler state.
6946 * @param idxVar The variable.
6947 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
6948 * @param off For recording in debug info.
6949 *
6950 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
6951 */
6952DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
6953{
6954 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6955 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6956 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6957 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
6958 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
6959
6960 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
6961 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6962
6963 iemNativeVarSetKindToStack(pReNative, idxVar);
6964 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6965
6966 return idxReg;
6967}
6968
6969
6970/**
6971 * A convenient helper function.
6972 */
6973DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6974 uint8_t idxReg, uint32_t *poff)
6975{
6976 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
6977 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6978 return idxReg;
6979}
6980
6981
6982/**
6983 * Worker that frees the stack slots for variable @a idxVar if any allocated.
6984 *
6985 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
6986 */
6987DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6988{
6989 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6990 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6991 {
6992 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
6993 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
6994 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
6995 Assert(cSlots > 0);
6996 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
6997 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
6998 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
6999 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7000 }
7001 else
7002 Assert(idxStackSlot == UINT8_MAX);
7003}
7004
7005
7006/**
7007 * Worker that frees a single variable.
7008 *
7009 * ASSUMES that @a idxVar is valid.
7010 */
7011DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7012{
7013 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7014 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7015
7016 /* Free the host register first if any assigned. */
7017 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7018 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7019 {
7020 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7021 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7022 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7023 }
7024
7025 /* Free argument mapping. */
7026 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7027 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7028 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7029
7030 /* Free the stack slots. */
7031 iemNativeVarFreeStackSlots(pReNative, idxVar);
7032
7033 /* Free the actual variable. */
7034 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7035 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7036}
7037
7038
7039/**
7040 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7041 */
7042DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7043{
7044 while (bmVars != 0)
7045 {
7046 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7047 bmVars &= ~RT_BIT_32(idxVar);
7048
7049#if 1 /** @todo optimize by simplifying this later... */
7050 iemNativeVarFreeOneWorker(pReNative, idxVar);
7051#else
7052 /* Only need to free the host register, the rest is done as bulk updates below. */
7053 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7054 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7055 {
7056 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7057 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7058 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7059 }
7060#endif
7061 }
7062#if 0 /** @todo optimize by simplifying this later... */
7063 pReNative->Core.bmVars = 0;
7064 pReNative->Core.bmStack = 0;
7065 pReNative->Core.u64ArgVars = UINT64_MAX;
7066#endif
7067}
7068
7069
7070/**
7071 * This is called by IEM_MC_END() to clean up all variables.
7072 */
7073DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
7074{
7075 uint32_t const bmVars = pReNative->Core.bmVars;
7076 if (bmVars != 0)
7077 iemNativeVarFreeAllSlow(pReNative, bmVars);
7078 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7079 Assert(pReNative->Core.bmStack == 0);
7080}
7081
7082
7083#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
7084
7085/**
7086 * This is called by IEM_MC_FREE_LOCAL.
7087 */
7088DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7089{
7090 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7091 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7092 iemNativeVarFreeOneWorker(pReNative, idxVar);
7093}
7094
7095
7096#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
7097
7098/**
7099 * This is called by IEM_MC_FREE_ARG.
7100 */
7101DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7102{
7103 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7104 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
7105 iemNativeVarFreeOneWorker(pReNative, idxVar);
7106}
7107
7108
7109#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
7110
7111/**
7112 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
7113 */
7114DECL_INLINE_THROW(uint32_t)
7115iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
7116{
7117 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
7118 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
7119 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7120 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
7121 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
7122
7123 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
7124 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
7125 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
7126 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7127
7128 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
7129
7130 /*
7131 * Special case for immediates.
7132 */
7133 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
7134 {
7135 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7136 {
7137 case sizeof(uint16_t):
7138 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7139 break;
7140 case sizeof(uint32_t):
7141 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7142 break;
7143 default: AssertFailed(); break;
7144 }
7145 }
7146 else
7147 {
7148 /*
7149 * The generic solution for now.
7150 */
7151 /** @todo optimize this by having the python script make sure the source
7152 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
7153 * statement. Then we could just transfer the register assignments. */
7154 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
7155 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
7156 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7157 {
7158 case sizeof(uint16_t):
7159 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
7160 break;
7161 case sizeof(uint32_t):
7162 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
7163 break;
7164 default: AssertFailed(); break;
7165 }
7166 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
7167 iemNativeVarRegisterRelease(pReNative, idxVarDst);
7168 }
7169 return off;
7170}
7171
7172
7173
7174/*********************************************************************************************************************************
7175* Emitters for IEM_MC_CALL_CIMPL_XXX *
7176*********************************************************************************************************************************/
7177
7178/**
7179 * Emits code to load a reference to the given guest register into @a idxGprDst.
7180 */
7181DECL_INLINE_THROW(uint32_t)
7182iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7183 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7184{
7185 /*
7186 * Get the offset relative to the CPUMCTX structure.
7187 */
7188 uint32_t offCpumCtx;
7189 switch (enmClass)
7190 {
7191 case kIemNativeGstRegRef_Gpr:
7192 Assert(idxRegInClass < 16);
7193 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7194 break;
7195
7196 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7197 Assert(idxRegInClass < 4);
7198 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7199 break;
7200
7201 case kIemNativeGstRegRef_EFlags:
7202 Assert(idxRegInClass == 0);
7203 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7204 break;
7205
7206 case kIemNativeGstRegRef_MxCsr:
7207 Assert(idxRegInClass == 0);
7208 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7209 break;
7210
7211 case kIemNativeGstRegRef_FpuReg:
7212 Assert(idxRegInClass < 8);
7213 AssertFailed(); /** @todo what kind of indexing? */
7214 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7215 break;
7216
7217 case kIemNativeGstRegRef_MReg:
7218 Assert(idxRegInClass < 8);
7219 AssertFailed(); /** @todo what kind of indexing? */
7220 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7221 break;
7222
7223 case kIemNativeGstRegRef_XReg:
7224 Assert(idxRegInClass < 16);
7225 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7226 break;
7227
7228 default:
7229 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7230 }
7231
7232 /*
7233 * Load the value into the destination register.
7234 */
7235#ifdef RT_ARCH_AMD64
7236 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7237
7238#elif defined(RT_ARCH_ARM64)
7239 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7240 Assert(offCpumCtx < 4096);
7241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7242
7243#else
7244# error "Port me!"
7245#endif
7246
7247 return off;
7248}
7249
7250
7251/**
7252 * Common code for CIMPL and AIMPL calls.
7253 *
7254 * These are calls that uses argument variables and such. They should not be
7255 * confused with internal calls required to implement an MC operation,
7256 * like a TLB load and similar.
7257 *
7258 * Upon return all that is left to do is to load any hidden arguments and
7259 * perform the call. All argument variables are freed.
7260 *
7261 * @returns New code buffer offset; throws VBox status code on error.
7262 * @param pReNative The native recompile state.
7263 * @param off The code buffer offset.
7264 * @param cArgs The total nubmer of arguments (includes hidden
7265 * count).
7266 * @param cHiddenArgs The number of hidden arguments. The hidden
7267 * arguments must not have any variable declared for
7268 * them, whereas all the regular arguments must
7269 * (tstIEMCheckMc ensures this).
7270 */
7271DECL_HIDDEN_THROW(uint32_t)
7272iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7273{
7274#ifdef VBOX_STRICT
7275 /*
7276 * Assert sanity.
7277 */
7278 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7279 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7280 for (unsigned i = 0; i < cHiddenArgs; i++)
7281 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7282 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7283 {
7284 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7285 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7286 }
7287 iemNativeRegAssertSanity(pReNative);
7288#endif
7289
7290 /*
7291 * Before we do anything else, go over variables that are referenced and
7292 * make sure they are not in a register.
7293 */
7294 uint32_t bmVars = pReNative->Core.bmVars;
7295 if (bmVars)
7296 {
7297 do
7298 {
7299 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7300 bmVars &= ~RT_BIT_32(idxVar);
7301
7302 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7303 {
7304 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7305 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7306 {
7307 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7308 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7309 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7310 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7311 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7312
7313 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7314 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7315 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7316 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7317 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7318 }
7319 }
7320 } while (bmVars != 0);
7321#if 0 //def VBOX_STRICT
7322 iemNativeRegAssertSanity(pReNative);
7323#endif
7324 }
7325
7326 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7327
7328 /*
7329 * First, go over the host registers that will be used for arguments and make
7330 * sure they either hold the desired argument or are free.
7331 */
7332 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7333 {
7334 for (uint32_t i = 0; i < cRegArgs; i++)
7335 {
7336 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7337 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7338 {
7339 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7340 {
7341 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7342 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7343 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
7344 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7345 if (uArgNo == i)
7346 { /* prefect */ }
7347 /* The variable allocator logic should make sure this is impossible,
7348 except for when the return register is used as a parameter (ARM,
7349 but not x86). */
7350#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7351 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7352 {
7353# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7354# error "Implement this"
7355# endif
7356 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7357 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7358 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7360 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7361 }
7362#endif
7363 else
7364 {
7365 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7366
7367 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
7368 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7369 else
7370 {
7371 /* just free it, can be reloaded if used again */
7372 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7373 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7374 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7375 }
7376 }
7377 }
7378 else
7379 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7380 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7381 }
7382 }
7383#if 0 //def VBOX_STRICT
7384 iemNativeRegAssertSanity(pReNative);
7385#endif
7386 }
7387
7388 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7389
7390#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7391 /*
7392 * If there are any stack arguments, make sure they are in their place as well.
7393 *
7394 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7395 * the caller) be loading it later and it must be free (see first loop).
7396 */
7397 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7398 {
7399 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7400 {
7401 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7402 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7403 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7404 {
7405 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7406 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
7407 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
7408 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7409 }
7410 else
7411 {
7412 /* Use ARG0 as temp for stuff we need registers for. */
7413 switch (pReNative->Core.aVars[idxVar].enmKind)
7414 {
7415 case kIemNativeVarKind_Stack:
7416 {
7417 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7418 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7419 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7420 iemNativeStackCalcBpDisp(idxStackSlot));
7421 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7422 continue;
7423 }
7424
7425 case kIemNativeVarKind_Immediate:
7426 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
7427 continue;
7428
7429 case kIemNativeVarKind_VarRef:
7430 {
7431 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7432 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7433 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7434 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7435 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7436 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7437 {
7438 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7439 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7440 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7441 }
7442 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7443 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7444 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7445 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7446 continue;
7447 }
7448
7449 case kIemNativeVarKind_GstRegRef:
7450 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7451 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7452 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7453 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7454 continue;
7455
7456 case kIemNativeVarKind_Invalid:
7457 case kIemNativeVarKind_End:
7458 break;
7459 }
7460 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7461 }
7462 }
7463# if 0 //def VBOX_STRICT
7464 iemNativeRegAssertSanity(pReNative);
7465# endif
7466 }
7467#else
7468 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7469#endif
7470
7471 /*
7472 * Make sure the argument variables are loaded into their respective registers.
7473 *
7474 * We can optimize this by ASSUMING that any register allocations are for
7475 * registeres that have already been loaded and are ready. The previous step
7476 * saw to that.
7477 */
7478 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
7479 {
7480 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7481 {
7482 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7483 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7484 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
7485 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
7486 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
7487 else
7488 {
7489 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7490 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7491 {
7492 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7493 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
7494 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
7495 | RT_BIT_32(idxArgReg);
7496 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
7497 }
7498 else
7499 {
7500 /* Use ARG0 as temp for stuff we need registers for. */
7501 switch (pReNative->Core.aVars[idxVar].enmKind)
7502 {
7503 case kIemNativeVarKind_Stack:
7504 {
7505 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7506 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7507 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
7508 continue;
7509 }
7510
7511 case kIemNativeVarKind_Immediate:
7512 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
7513 continue;
7514
7515 case kIemNativeVarKind_VarRef:
7516 {
7517 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7518 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7519 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7520 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7521 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7522 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7523 {
7524 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7525 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7526 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7527 }
7528 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7529 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7530 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
7531 continue;
7532 }
7533
7534 case kIemNativeVarKind_GstRegRef:
7535 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
7536 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7537 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7538 continue;
7539
7540 case kIemNativeVarKind_Invalid:
7541 case kIemNativeVarKind_End:
7542 break;
7543 }
7544 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7545 }
7546 }
7547 }
7548#if 0 //def VBOX_STRICT
7549 iemNativeRegAssertSanity(pReNative);
7550#endif
7551 }
7552#ifdef VBOX_STRICT
7553 else
7554 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7555 {
7556 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
7557 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
7558 }
7559#endif
7560
7561 /*
7562 * Free all argument variables (simplified).
7563 * Their lifetime always expires with the call they are for.
7564 */
7565 /** @todo Make the python script check that arguments aren't used after
7566 * IEM_MC_CALL_XXXX. */
7567 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
7568 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
7569 * an argument value. There is also some FPU stuff. */
7570 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
7571 {
7572 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7573 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7574
7575 /* no need to free registers: */
7576 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
7577 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
7578 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
7579 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
7580 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
7581 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
7582
7583 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
7584 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7585 iemNativeVarFreeStackSlots(pReNative, idxVar);
7586 }
7587 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7588
7589 /*
7590 * Flush volatile registers as we make the call.
7591 */
7592 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
7593
7594 return off;
7595}
7596
7597
7598/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
7599DECL_HIDDEN_THROW(uint32_t)
7600iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
7601 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
7602
7603{
7604 /*
7605 * Do all the call setup and cleanup.
7606 */
7607 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
7608
7609 /*
7610 * Load the two or three hidden arguments.
7611 */
7612#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7613 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7614 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7615 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
7616#else
7617 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7618 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
7619#endif
7620
7621 /*
7622 * Make the call and check the return code.
7623 *
7624 * Shadow PC copies are always flushed here, other stuff depends on flags.
7625 * Segment and general purpose registers are explictily flushed via the
7626 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
7627 * macros.
7628 */
7629 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
7630#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7631 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7632#endif
7633 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
7634 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
7635 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
7636 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7637
7638 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7639}
7640
7641
7642#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7643 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
7644
7645/** Emits code for IEM_MC_CALL_CIMPL_1. */
7646DECL_INLINE_THROW(uint32_t)
7647iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7648 uintptr_t pfnCImpl, uint8_t idxArg0)
7649{
7650 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7651 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
7652}
7653
7654
7655#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7656 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
7657
7658/** Emits code for IEM_MC_CALL_CIMPL_2. */
7659DECL_INLINE_THROW(uint32_t)
7660iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7661 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
7662{
7663 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7664 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7665 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
7666}
7667
7668
7669#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7670 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7671 (uintptr_t)a_pfnCImpl, a0, a1, a2)
7672
7673/** Emits code for IEM_MC_CALL_CIMPL_3. */
7674DECL_INLINE_THROW(uint32_t)
7675iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7676 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7677{
7678 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7679 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7680 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7681 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
7682}
7683
7684
7685#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
7686 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7687 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
7688
7689/** Emits code for IEM_MC_CALL_CIMPL_4. */
7690DECL_INLINE_THROW(uint32_t)
7691iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7692 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7693{
7694 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7695 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7696 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7697 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7698 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
7699}
7700
7701
7702#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
7703 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7704 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
7705
7706/** Emits code for IEM_MC_CALL_CIMPL_4. */
7707DECL_INLINE_THROW(uint32_t)
7708iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7709 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
7710{
7711 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7712 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7713 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7714 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7715 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
7716 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
7717}
7718
7719
7720/** Recompiler debugging: Flush guest register shadow copies. */
7721#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
7722
7723
7724
7725/*********************************************************************************************************************************
7726* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
7727*********************************************************************************************************************************/
7728
7729/**
7730 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
7731 */
7732DECL_INLINE_THROW(uint32_t)
7733iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7734 uintptr_t pfnAImpl, uint8_t cArgs)
7735{
7736 if (idxVarRc != UINT8_MAX)
7737 {
7738 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
7739 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
7740 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
7741 }
7742
7743 /*
7744 * Do all the call setup and cleanup.
7745 */
7746 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
7747
7748 /*
7749 * Make the call and update the return code variable if we've got one.
7750 */
7751 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
7752 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
7753 {
7754pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
7755 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
7756 }
7757
7758 return off;
7759}
7760
7761
7762
7763#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
7764 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
7765
7766#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
7767 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
7768
7769/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
7770DECL_INLINE_THROW(uint32_t)
7771iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
7772{
7773 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
7774}
7775
7776
7777#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
7778 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
7779
7780#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
7781 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
7782
7783/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
7784DECL_INLINE_THROW(uint32_t)
7785iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
7786{
7787 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7788 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
7789}
7790
7791
7792#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
7793 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
7794
7795#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
7796 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
7797
7798/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
7799DECL_INLINE_THROW(uint32_t)
7800iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7801 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
7802{
7803 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7804 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7805 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
7806}
7807
7808
7809#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
7810 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
7811
7812#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
7813 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
7814
7815/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
7816DECL_INLINE_THROW(uint32_t)
7817iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7818 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7819{
7820 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7821 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7822 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7823 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
7824}
7825
7826
7827#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
7828 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7829
7830#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
7831 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7832
7833/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
7834DECL_INLINE_THROW(uint32_t)
7835iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7836 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7837{
7838 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7839 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7840 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7841 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
7842 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
7843}
7844
7845
7846
7847/*********************************************************************************************************************************
7848* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
7849*********************************************************************************************************************************/
7850
7851#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
7852 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
7853
7854#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7855 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
7856
7857#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7858 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
7859
7860#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7861 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
7862
7863
7864/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
7865 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
7866DECL_INLINE_THROW(uint32_t)
7867iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
7868{
7869 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7870 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7871 Assert(iGRegEx < 20);
7872
7873 /* Same discussion as in iemNativeEmitFetchGregU16 */
7874 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7875 kIemNativeGstRegUse_ReadOnly);
7876
7877 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7878 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7879
7880 /* The value is zero-extended to the full 64-bit host register width. */
7881 if (iGRegEx < 16)
7882 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7883 else
7884 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7885
7886 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7887 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7888 return off;
7889}
7890
7891
7892#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7893 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
7894
7895#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7896 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
7897
7898#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7899 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
7900
7901/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
7902DECL_INLINE_THROW(uint32_t)
7903iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
7904{
7905 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7906 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7907 Assert(iGRegEx < 20);
7908
7909 /* Same discussion as in iemNativeEmitFetchGregU16 */
7910 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7911 kIemNativeGstRegUse_ReadOnly);
7912
7913 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7914 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7915
7916 if (iGRegEx < 16)
7917 {
7918 switch (cbSignExtended)
7919 {
7920 case sizeof(uint16_t):
7921 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7922 break;
7923 case sizeof(uint32_t):
7924 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7925 break;
7926 case sizeof(uint64_t):
7927 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7928 break;
7929 default: AssertFailed(); break;
7930 }
7931 }
7932 else
7933 {
7934 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7935 switch (cbSignExtended)
7936 {
7937 case sizeof(uint16_t):
7938 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7939 break;
7940 case sizeof(uint32_t):
7941 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7942 break;
7943 case sizeof(uint64_t):
7944 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7945 break;
7946 default: AssertFailed(); break;
7947 }
7948 }
7949
7950 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7951 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7952 return off;
7953}
7954
7955
7956
7957#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
7958 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
7959
7960#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
7961 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7962
7963#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
7964 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7965
7966/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
7967DECL_INLINE_THROW(uint32_t)
7968iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
7969{
7970 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7971 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7972 Assert(iGReg < 16);
7973
7974 /*
7975 * We can either just load the low 16-bit of the GPR into a host register
7976 * for the variable, or we can do so via a shadow copy host register. The
7977 * latter will avoid having to reload it if it's being stored later, but
7978 * will waste a host register if it isn't touched again. Since we don't
7979 * know what going to happen, we choose the latter for now.
7980 */
7981 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7982 kIemNativeGstRegUse_ReadOnly);
7983
7984 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7985 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7986 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7987 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7988
7989 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7990 return off;
7991}
7992
7993
7994#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
7995 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7996
7997#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
7998 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7999
8000/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
8001DECL_INLINE_THROW(uint32_t)
8002iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
8003{
8004 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8005 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8006 Assert(iGReg < 16);
8007
8008 /*
8009 * We can either just load the low 16-bit of the GPR into a host register
8010 * for the variable, or we can do so via a shadow copy host register. The
8011 * latter will avoid having to reload it if it's being stored later, but
8012 * will waste a host register if it isn't touched again. Since we don't
8013 * know what going to happen, we choose the latter for now.
8014 */
8015 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8016 kIemNativeGstRegUse_ReadOnly);
8017
8018 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8019 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8020 if (cbSignExtended == sizeof(uint32_t))
8021 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8022 else
8023 {
8024 Assert(cbSignExtended == sizeof(uint64_t));
8025 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8026 }
8027 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8028
8029 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8030 return off;
8031}
8032
8033
8034#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
8035 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
8036
8037#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
8038 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
8039
8040/** Emits code for IEM_MC_FETCH_GREG_U32. */
8041DECL_INLINE_THROW(uint32_t)
8042iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8043{
8044 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8045 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
8046 Assert(iGReg < 16);
8047
8048 /*
8049 * We can either just load the low 16-bit of the GPR into a host register
8050 * for the variable, or we can do so via a shadow copy host register. The
8051 * latter will avoid having to reload it if it's being stored later, but
8052 * will waste a host register if it isn't touched again. Since we don't
8053 * know what going to happen, we choose the latter for now.
8054 */
8055 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8056 kIemNativeGstRegUse_ReadOnly);
8057
8058 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8059 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8060 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8061 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8062
8063 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8064 return off;
8065}
8066
8067
8068#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
8069 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
8070
8071/** Emits code for IEM_MC_FETCH_GREG_U32. */
8072DECL_INLINE_THROW(uint32_t)
8073iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8074{
8075 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8076 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8077 Assert(iGReg < 16);
8078
8079 /*
8080 * We can either just load the low 32-bit of the GPR into a host register
8081 * for the variable, or we can do so via a shadow copy host register. The
8082 * latter will avoid having to reload it if it's being stored later, but
8083 * will waste a host register if it isn't touched again. Since we don't
8084 * know what going to happen, we choose the latter for now.
8085 */
8086 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8087 kIemNativeGstRegUse_ReadOnly);
8088
8089 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8090 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8091 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8092 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8093
8094 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8095 return off;
8096}
8097
8098
8099#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
8100 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8101
8102#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
8103 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8104
8105/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
8106 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
8107DECL_INLINE_THROW(uint32_t)
8108iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8109{
8110 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8111 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8112 Assert(iGReg < 16);
8113
8114 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8115 kIemNativeGstRegUse_ReadOnly);
8116
8117 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8118 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8119 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
8120 /** @todo name the register a shadow one already? */
8121 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8122
8123 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8124 return off;
8125}
8126
8127
8128
8129/*********************************************************************************************************************************
8130* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
8131*********************************************************************************************************************************/
8132
8133#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
8134 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
8135
8136/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
8137DECL_INLINE_THROW(uint32_t)
8138iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
8139{
8140 Assert(iGRegEx < 20);
8141 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8142 kIemNativeGstRegUse_ForUpdate);
8143#ifdef RT_ARCH_AMD64
8144 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8145
8146 /* To the lowest byte of the register: mov r8, imm8 */
8147 if (iGRegEx < 16)
8148 {
8149 if (idxGstTmpReg >= 8)
8150 pbCodeBuf[off++] = X86_OP_REX_B;
8151 else if (idxGstTmpReg >= 4)
8152 pbCodeBuf[off++] = X86_OP_REX;
8153 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8154 pbCodeBuf[off++] = u8Value;
8155 }
8156 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
8157 else if (idxGstTmpReg < 4)
8158 {
8159 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
8160 pbCodeBuf[off++] = u8Value;
8161 }
8162 else
8163 {
8164 /* ror reg64, 8 */
8165 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8166 pbCodeBuf[off++] = 0xc1;
8167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8168 pbCodeBuf[off++] = 8;
8169
8170 /* mov reg8, imm8 */
8171 if (idxGstTmpReg >= 8)
8172 pbCodeBuf[off++] = X86_OP_REX_B;
8173 else if (idxGstTmpReg >= 4)
8174 pbCodeBuf[off++] = X86_OP_REX;
8175 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8176 pbCodeBuf[off++] = u8Value;
8177
8178 /* rol reg64, 8 */
8179 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8180 pbCodeBuf[off++] = 0xc1;
8181 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8182 pbCodeBuf[off++] = 8;
8183 }
8184
8185#elif defined(RT_ARCH_ARM64)
8186 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
8187 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8188 if (iGRegEx < 16)
8189 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
8190 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
8191 else
8192 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
8193 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
8194 iemNativeRegFreeTmp(pReNative, idxImmReg);
8195
8196#else
8197# error "Port me!"
8198#endif
8199
8200 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8201
8202 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8203
8204 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8205 return off;
8206}
8207
8208
8209#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
8210 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
8211
8212/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
8213DECL_INLINE_THROW(uint32_t)
8214iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
8215{
8216 Assert(iGRegEx < 20);
8217 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8218
8219 /*
8220 * If it's a constant value (unlikely) we treat this as a
8221 * IEM_MC_STORE_GREG_U8_CONST statement.
8222 */
8223 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8224 { /* likely */ }
8225 else
8226 {
8227 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8228 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8229 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8230 }
8231
8232 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8233 kIemNativeGstRegUse_ForUpdate);
8234 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8235
8236#ifdef RT_ARCH_AMD64
8237 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
8238 if (iGRegEx < 16)
8239 {
8240 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8241 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8242 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8243 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8244 pbCodeBuf[off++] = X86_OP_REX;
8245 pbCodeBuf[off++] = 0x8a;
8246 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8247 }
8248 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
8249 else if (idxGstTmpReg < 4 && idxVarReg < 4)
8250 {
8251 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
8252 pbCodeBuf[off++] = 0x8a;
8253 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
8254 }
8255 else
8256 {
8257 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
8258
8259 /* ror reg64, 8 */
8260 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8261 pbCodeBuf[off++] = 0xc1;
8262 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8263 pbCodeBuf[off++] = 8;
8264
8265 /* mov reg8, reg8(r/m) */
8266 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8267 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8268 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8269 pbCodeBuf[off++] = X86_OP_REX;
8270 pbCodeBuf[off++] = 0x8a;
8271 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8272
8273 /* rol reg64, 8 */
8274 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8275 pbCodeBuf[off++] = 0xc1;
8276 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8277 pbCodeBuf[off++] = 8;
8278 }
8279
8280#elif defined(RT_ARCH_ARM64)
8281 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
8282 or
8283 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
8284 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8285 if (iGRegEx < 16)
8286 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
8287 else
8288 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
8289
8290#else
8291# error "Port me!"
8292#endif
8293 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8294
8295 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8296
8297 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8298 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8299 return off;
8300}
8301
8302
8303
8304#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
8305 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
8306
8307/** Emits code for IEM_MC_STORE_GREG_U16. */
8308DECL_INLINE_THROW(uint32_t)
8309iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
8310{
8311 Assert(iGReg < 16);
8312 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8313 kIemNativeGstRegUse_ForUpdate);
8314#ifdef RT_ARCH_AMD64
8315 /* mov reg16, imm16 */
8316 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8317 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8318 if (idxGstTmpReg >= 8)
8319 pbCodeBuf[off++] = X86_OP_REX_B;
8320 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
8321 pbCodeBuf[off++] = RT_BYTE1(uValue);
8322 pbCodeBuf[off++] = RT_BYTE2(uValue);
8323
8324#elif defined(RT_ARCH_ARM64)
8325 /* movk xdst, #uValue, lsl #0 */
8326 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8327 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
8328
8329#else
8330# error "Port me!"
8331#endif
8332
8333 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8334
8335 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8336 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8337 return off;
8338}
8339
8340
8341#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
8342 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
8343
8344/** Emits code for IEM_MC_STORE_GREG_U16. */
8345DECL_INLINE_THROW(uint32_t)
8346iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8347{
8348 Assert(iGReg < 16);
8349 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8350
8351 /*
8352 * If it's a constant value (unlikely) we treat this as a
8353 * IEM_MC_STORE_GREG_U16_CONST statement.
8354 */
8355 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8356 { /* likely */ }
8357 else
8358 {
8359 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8360 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8361 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8362 }
8363
8364 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8365 kIemNativeGstRegUse_ForUpdate);
8366
8367#ifdef RT_ARCH_AMD64
8368 /* mov reg16, reg16 or [mem16] */
8369 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8370 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8371 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8372 {
8373 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
8374 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
8375 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
8376 pbCodeBuf[off++] = 0x8b;
8377 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
8378 }
8379 else
8380 {
8381 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
8382 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8383 if (idxGstTmpReg >= 8)
8384 pbCodeBuf[off++] = X86_OP_REX_R;
8385 pbCodeBuf[off++] = 0x8b;
8386 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8387 }
8388
8389#elif defined(RT_ARCH_ARM64)
8390 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
8391 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8392 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8393 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
8394 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8395
8396#else
8397# error "Port me!"
8398#endif
8399
8400 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8401
8402 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8403 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8404 return off;
8405}
8406
8407
8408#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
8409 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
8410
8411/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
8412DECL_INLINE_THROW(uint32_t)
8413iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
8414{
8415 Assert(iGReg < 16);
8416 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8417 kIemNativeGstRegUse_ForFullWrite);
8418 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8419 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8420 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8421 return off;
8422}
8423
8424
8425#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
8426 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
8427
8428/** Emits code for IEM_MC_STORE_GREG_U32. */
8429DECL_INLINE_THROW(uint32_t)
8430iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8431{
8432 Assert(iGReg < 16);
8433 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8434
8435 /*
8436 * If it's a constant value (unlikely) we treat this as a
8437 * IEM_MC_STORE_GREG_U32_CONST statement.
8438 */
8439 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8440 { /* likely */ }
8441 else
8442 {
8443 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8444 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8445 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8446 }
8447
8448 /*
8449 * For the rest we allocate a guest register for the variable and writes
8450 * it to the CPUMCTX structure.
8451 */
8452 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8453 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8454#ifdef VBOX_STRICT
8455 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
8456#endif
8457 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8458 return off;
8459}
8460
8461
8462#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
8463 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
8464
8465/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
8466DECL_INLINE_THROW(uint32_t)
8467iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
8468{
8469 Assert(iGReg < 16);
8470 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8471 kIemNativeGstRegUse_ForFullWrite);
8472 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8473 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8474 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8475 return off;
8476}
8477
8478
8479#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
8480 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
8481
8482/** Emits code for IEM_MC_STORE_GREG_U64. */
8483DECL_INLINE_THROW(uint32_t)
8484iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8485{
8486 Assert(iGReg < 16);
8487 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8488
8489 /*
8490 * If it's a constant value (unlikely) we treat this as a
8491 * IEM_MC_STORE_GREG_U64_CONST statement.
8492 */
8493 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8494 { /* likely */ }
8495 else
8496 {
8497 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8498 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8499 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
8500 }
8501
8502 /*
8503 * For the rest we allocate a guest register for the variable and writes
8504 * it to the CPUMCTX structure.
8505 */
8506 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8507 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8508 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8509 return off;
8510}
8511
8512
8513#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
8514 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
8515
8516/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
8517DECL_INLINE_THROW(uint32_t)
8518iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
8519{
8520 Assert(iGReg < 16);
8521 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8522 kIemNativeGstRegUse_ForUpdate);
8523 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
8524 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8525 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8526 return off;
8527}
8528
8529
8530/*********************************************************************************************************************************
8531* General purpose register manipulation (add, sub). *
8532*********************************************************************************************************************************/
8533
8534#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8535 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8536
8537/** Emits code for IEM_MC_ADD_GREG_U16. */
8538DECL_INLINE_THROW(uint32_t)
8539iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
8540{
8541 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8542 kIemNativeGstRegUse_ForUpdate);
8543
8544#ifdef RT_ARCH_AMD64
8545 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8546 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8547 if (idxGstTmpReg >= 8)
8548 pbCodeBuf[off++] = X86_OP_REX_B;
8549 if (uAddend == 1)
8550 {
8551 pbCodeBuf[off++] = 0xff; /* inc */
8552 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8553 }
8554 else
8555 {
8556 pbCodeBuf[off++] = 0x81;
8557 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8558 pbCodeBuf[off++] = uAddend;
8559 pbCodeBuf[off++] = 0;
8560 }
8561
8562#else
8563 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8564 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8565
8566 /* sub tmp, gstgrp, uAddend */
8567 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
8568
8569 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8570 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8571
8572 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8573#endif
8574
8575 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8576
8577 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8578
8579 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8580 return off;
8581}
8582
8583
8584#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
8585 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8586
8587#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
8588 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8589
8590/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
8591DECL_INLINE_THROW(uint32_t)
8592iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
8593{
8594 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8595 kIemNativeGstRegUse_ForUpdate);
8596
8597#ifdef RT_ARCH_AMD64
8598 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8599 if (f64Bit)
8600 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8601 else if (idxGstTmpReg >= 8)
8602 pbCodeBuf[off++] = X86_OP_REX_B;
8603 if (uAddend == 1)
8604 {
8605 pbCodeBuf[off++] = 0xff; /* inc */
8606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8607 }
8608 else if (uAddend < 128)
8609 {
8610 pbCodeBuf[off++] = 0x83; /* add */
8611 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8612 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8613 }
8614 else
8615 {
8616 pbCodeBuf[off++] = 0x81; /* add */
8617 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8618 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8619 pbCodeBuf[off++] = 0;
8620 pbCodeBuf[off++] = 0;
8621 pbCodeBuf[off++] = 0;
8622 }
8623
8624#else
8625 /* sub tmp, gstgrp, uAddend */
8626 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8627 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
8628
8629#endif
8630
8631 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8632
8633 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8634
8635 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8636 return off;
8637}
8638
8639
8640
8641#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8642 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8643
8644/** Emits code for IEM_MC_SUB_GREG_U16. */
8645DECL_INLINE_THROW(uint32_t)
8646iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
8647{
8648 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8649 kIemNativeGstRegUse_ForUpdate);
8650
8651#ifdef RT_ARCH_AMD64
8652 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8653 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8654 if (idxGstTmpReg >= 8)
8655 pbCodeBuf[off++] = X86_OP_REX_B;
8656 if (uSubtrahend == 1)
8657 {
8658 pbCodeBuf[off++] = 0xff; /* dec */
8659 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8660 }
8661 else
8662 {
8663 pbCodeBuf[off++] = 0x81;
8664 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8665 pbCodeBuf[off++] = uSubtrahend;
8666 pbCodeBuf[off++] = 0;
8667 }
8668
8669#else
8670 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8671 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8672
8673 /* sub tmp, gstgrp, uSubtrahend */
8674 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
8675
8676 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8677 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8678
8679 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8680#endif
8681
8682 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8683
8684 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8685
8686 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8687 return off;
8688}
8689
8690
8691#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
8692 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8693
8694#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
8695 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8696
8697/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
8698DECL_INLINE_THROW(uint32_t)
8699iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
8700{
8701 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8702 kIemNativeGstRegUse_ForUpdate);
8703
8704#ifdef RT_ARCH_AMD64
8705 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8706 if (f64Bit)
8707 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8708 else if (idxGstTmpReg >= 8)
8709 pbCodeBuf[off++] = X86_OP_REX_B;
8710 if (uSubtrahend == 1)
8711 {
8712 pbCodeBuf[off++] = 0xff; /* dec */
8713 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8714 }
8715 else if (uSubtrahend < 128)
8716 {
8717 pbCodeBuf[off++] = 0x83; /* sub */
8718 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8719 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8720 }
8721 else
8722 {
8723 pbCodeBuf[off++] = 0x81; /* sub */
8724 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8725 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8726 pbCodeBuf[off++] = 0;
8727 pbCodeBuf[off++] = 0;
8728 pbCodeBuf[off++] = 0;
8729 }
8730
8731#else
8732 /* sub tmp, gstgrp, uSubtrahend */
8733 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8734 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
8735
8736#endif
8737
8738 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8739
8740 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8741
8742 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8743 return off;
8744}
8745
8746
8747
8748/*********************************************************************************************************************************
8749* EFLAGS *
8750*********************************************************************************************************************************/
8751
8752#define IEM_MC_FETCH_EFLAGS(a_EFlags) \
8753 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
8754
8755/** Handles IEM_MC_FETCH_EFLAGS. */
8756DECL_INLINE_THROW(uint32_t)
8757iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8758{
8759 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8760 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8761
8762 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
8763 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8764 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8765 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
8766 return off;
8767}
8768
8769
8770#define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
8771 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
8772
8773/** Handles IEM_MC_COMMIT_EFLAGS. */
8774DECL_INLINE_THROW(uint32_t)
8775iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8776{
8777 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8778 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8779
8780 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
8781
8782#ifdef VBOX_STRICT
8783 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
8784 uint32_t offFixup = off;
8785 off = iemNativeEmitJnzToFixed(pReNative, off, off);
8786 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
8787 iemNativeFixupFixedJump(pReNative, offFixup, off);
8788
8789 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
8790 offFixup = off;
8791 off = iemNativeEmitJzToFixed(pReNative, off, off);
8792 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
8793 iemNativeFixupFixedJump(pReNative, offFixup, off);
8794#endif
8795
8796 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8797 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
8798 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
8799 return off;
8800}
8801
8802
8803
8804/*********************************************************************************************************************************
8805* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
8806*********************************************************************************************************************************/
8807
8808#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
8809 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
8810
8811#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
8812 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
8813
8814#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
8815 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
8816
8817
8818/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
8819 * IEM_MC_FETCH_SREG_ZX_U64. */
8820DECL_INLINE_THROW(uint32_t)
8821iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
8822{
8823 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8824 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
8825 Assert(iSReg < X86_SREG_COUNT);
8826
8827 /*
8828 * For now, we will not create a shadow copy of a selector. The rational
8829 * is that since we do not recompile the popping and loading of segment
8830 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
8831 * pushing and moving to registers, there is only a small chance that the
8832 * shadow copy will be accessed again before the register is reloaded. One
8833 * scenario would be nested called in 16-bit code, but I doubt it's worth
8834 * the extra register pressure atm.
8835 *
8836 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
8837 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
8838 * store scencario covered at present (r160730).
8839 */
8840 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8841 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8842 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
8843 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8844 return off;
8845}
8846
8847
8848
8849/*********************************************************************************************************************************
8850* Register references. *
8851*********************************************************************************************************************************/
8852
8853#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
8854 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
8855
8856#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
8857 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
8858
8859/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
8860DECL_INLINE_THROW(uint32_t)
8861iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
8862{
8863 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8864 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8865 Assert(iGRegEx < 20);
8866
8867 if (iGRegEx < 16)
8868 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8869 else
8870 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
8871
8872 /* If we've delayed writing back the register value, flush it now. */
8873 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8874
8875 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8876 if (!fConst)
8877 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
8878
8879 return off;
8880}
8881
8882#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
8883 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
8884
8885#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
8886 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
8887
8888#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
8889 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
8890
8891#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
8892 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
8893
8894#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
8895 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
8896
8897#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
8898 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
8899
8900#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
8901 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
8902
8903#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
8904 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
8905
8906#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
8907 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
8908
8909#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
8910 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
8911
8912/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
8913DECL_INLINE_THROW(uint32_t)
8914iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
8915{
8916 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8917 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8918 Assert(iGReg < 16);
8919
8920 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
8921
8922 /* If we've delayed writing back the register value, flush it now. */
8923 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
8924
8925 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8926 if (!fConst)
8927 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
8928
8929 return off;
8930}
8931
8932
8933#define IEM_MC_REF_EFLAGS(a_pEFlags) \
8934 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
8935
8936/** Handles IEM_MC_REF_EFLAGS. */
8937DECL_INLINE_THROW(uint32_t)
8938iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
8939{
8940 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8941 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8942
8943 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
8944
8945 /* If we've delayed writing back the register value, flush it now. */
8946 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
8947
8948 /* If there is a shadow copy of guest EFLAGS, flush it now. */
8949 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
8950
8951 return off;
8952}
8953
8954
8955/*********************************************************************************************************************************
8956* Effective Address Calculation *
8957*********************************************************************************************************************************/
8958#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
8959 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
8960
8961/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
8962 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
8963DECL_INLINE_THROW(uint32_t)
8964iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8965 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
8966{
8967 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
8968
8969 /*
8970 * Handle the disp16 form with no registers first.
8971 *
8972 * Convert to an immediate value, as that'll delay the register allocation
8973 * and assignment till the memory access / call / whatever and we can use
8974 * a more appropriate register (or none at all).
8975 */
8976 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
8977 {
8978 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
8979 return off;
8980 }
8981
8982 /* Determin the displacment. */
8983 uint16_t u16EffAddr;
8984 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
8985 {
8986 case 0: u16EffAddr = 0; break;
8987 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
8988 case 2: u16EffAddr = u16Disp; break;
8989 default: AssertFailedStmt(u16EffAddr = 0);
8990 }
8991
8992 /* Determine the registers involved. */
8993 uint8_t idxGstRegBase;
8994 uint8_t idxGstRegIndex;
8995 switch (bRm & X86_MODRM_RM_MASK)
8996 {
8997 case 0:
8998 idxGstRegBase = X86_GREG_xBX;
8999 idxGstRegIndex = X86_GREG_xSI;
9000 break;
9001 case 1:
9002 idxGstRegBase = X86_GREG_xBX;
9003 idxGstRegIndex = X86_GREG_xDI;
9004 break;
9005 case 2:
9006 idxGstRegBase = X86_GREG_xBP;
9007 idxGstRegIndex = X86_GREG_xSI;
9008 break;
9009 case 3:
9010 idxGstRegBase = X86_GREG_xBP;
9011 idxGstRegIndex = X86_GREG_xDI;
9012 break;
9013 case 4:
9014 idxGstRegBase = X86_GREG_xSI;
9015 idxGstRegIndex = UINT8_MAX;
9016 break;
9017 case 5:
9018 idxGstRegBase = X86_GREG_xDI;
9019 idxGstRegIndex = UINT8_MAX;
9020 break;
9021 case 6:
9022 idxGstRegBase = X86_GREG_xBP;
9023 idxGstRegIndex = UINT8_MAX;
9024 break;
9025#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
9026 default:
9027#endif
9028 case 7:
9029 idxGstRegBase = X86_GREG_xBX;
9030 idxGstRegIndex = UINT8_MAX;
9031 break;
9032 }
9033
9034 /*
9035 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
9036 */
9037 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9038 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9039 kIemNativeGstRegUse_ReadOnly);
9040 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
9041 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9042 kIemNativeGstRegUse_ReadOnly)
9043 : UINT8_MAX;
9044#ifdef RT_ARCH_AMD64
9045 if (idxRegIndex == UINT8_MAX)
9046 {
9047 if (u16EffAddr == 0)
9048 {
9049 /* movxz ret, base */
9050 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
9051 }
9052 else
9053 {
9054 /* lea ret32, [base64 + disp32] */
9055 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9056 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9057 if (idxRegRet >= 8 || idxRegBase >= 8)
9058 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9059 pbCodeBuf[off++] = 0x8d;
9060 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9061 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
9062 else
9063 {
9064 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
9065 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9066 }
9067 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9068 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9069 pbCodeBuf[off++] = 0;
9070 pbCodeBuf[off++] = 0;
9071 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9072
9073 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9074 }
9075 }
9076 else
9077 {
9078 /* lea ret32, [index64 + base64 (+ disp32)] */
9079 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9080 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9081 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9082 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9083 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9084 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9085 pbCodeBuf[off++] = 0x8d;
9086 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
9087 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9088 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
9089 if (bMod == X86_MOD_MEM4)
9090 {
9091 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9092 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9093 pbCodeBuf[off++] = 0;
9094 pbCodeBuf[off++] = 0;
9095 }
9096 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9097 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9098 }
9099
9100#elif defined(RT_ARCH_ARM64)
9101 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9102 if (u16EffAddr == 0)
9103 {
9104 if (idxRegIndex == UINT8_MAX)
9105 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
9106 else
9107 {
9108 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
9109 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9110 }
9111 }
9112 else
9113 {
9114 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
9115 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
9116 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
9117 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9118 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
9119 else
9120 {
9121 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
9122 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9123 }
9124 if (idxRegIndex != UINT8_MAX)
9125 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
9126 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9127 }
9128
9129#else
9130# error "port me"
9131#endif
9132
9133 if (idxRegIndex != UINT8_MAX)
9134 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9135 iemNativeRegFreeTmp(pReNative, idxRegBase);
9136 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9137 return off;
9138}
9139
9140
9141#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
9142 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
9143
9144/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
9145 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
9146DECL_INLINE_THROW(uint32_t)
9147iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9148 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
9149{
9150 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9151
9152 /*
9153 * Handle the disp32 form with no registers first.
9154 *
9155 * Convert to an immediate value, as that'll delay the register allocation
9156 * and assignment till the memory access / call / whatever and we can use
9157 * a more appropriate register (or none at all).
9158 */
9159 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9160 {
9161 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
9162 return off;
9163 }
9164
9165 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9166 uint32_t u32EffAddr = 0;
9167 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9168 {
9169 case 0: break;
9170 case 1: u32EffAddr = (int8_t)u32Disp; break;
9171 case 2: u32EffAddr = u32Disp; break;
9172 default: AssertFailed();
9173 }
9174
9175 /* Get the register (or SIB) value. */
9176 uint8_t idxGstRegBase = UINT8_MAX;
9177 uint8_t idxGstRegIndex = UINT8_MAX;
9178 uint8_t cShiftIndex = 0;
9179 switch (bRm & X86_MODRM_RM_MASK)
9180 {
9181 case 0: idxGstRegBase = X86_GREG_xAX; break;
9182 case 1: idxGstRegBase = X86_GREG_xCX; break;
9183 case 2: idxGstRegBase = X86_GREG_xDX; break;
9184 case 3: idxGstRegBase = X86_GREG_xBX; break;
9185 case 4: /* SIB */
9186 {
9187 /* index /w scaling . */
9188 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9189 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9190 {
9191 case 0: idxGstRegIndex = X86_GREG_xAX; break;
9192 case 1: idxGstRegIndex = X86_GREG_xCX; break;
9193 case 2: idxGstRegIndex = X86_GREG_xDX; break;
9194 case 3: idxGstRegIndex = X86_GREG_xBX; break;
9195 case 4: cShiftIndex = 0; /*no index*/ break;
9196 case 5: idxGstRegIndex = X86_GREG_xBP; break;
9197 case 6: idxGstRegIndex = X86_GREG_xSI; break;
9198 case 7: idxGstRegIndex = X86_GREG_xDI; break;
9199 }
9200
9201 /* base */
9202 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
9203 {
9204 case 0: idxGstRegBase = X86_GREG_xAX; break;
9205 case 1: idxGstRegBase = X86_GREG_xCX; break;
9206 case 2: idxGstRegBase = X86_GREG_xDX; break;
9207 case 3: idxGstRegBase = X86_GREG_xBX; break;
9208 case 4:
9209 idxGstRegBase = X86_GREG_xSP;
9210 u32EffAddr += uSibAndRspOffset >> 8;
9211 break;
9212 case 5:
9213 if ((bRm & X86_MODRM_MOD_MASK) != 0)
9214 idxGstRegBase = X86_GREG_xBP;
9215 else
9216 {
9217 Assert(u32EffAddr == 0);
9218 u32EffAddr = u32Disp;
9219 }
9220 break;
9221 case 6: idxGstRegBase = X86_GREG_xSI; break;
9222 case 7: idxGstRegBase = X86_GREG_xDI; break;
9223 }
9224 break;
9225 }
9226 case 5: idxGstRegBase = X86_GREG_xBP; break;
9227 case 6: idxGstRegBase = X86_GREG_xSI; break;
9228 case 7: idxGstRegBase = X86_GREG_xDI; break;
9229 }
9230
9231 /*
9232 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9233 * the start of the function.
9234 */
9235 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9236 {
9237 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
9238 return off;
9239 }
9240
9241 /*
9242 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9243 */
9244 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9245 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9246 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9247 kIemNativeGstRegUse_ReadOnly);
9248 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9249 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9250 kIemNativeGstRegUse_ReadOnly);
9251
9252 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9253 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9254 {
9255 idxRegBase = idxRegIndex;
9256 idxRegIndex = UINT8_MAX;
9257 }
9258
9259#ifdef RT_ARCH_AMD64
9260 if (idxRegIndex == UINT8_MAX)
9261 {
9262 if (u32EffAddr == 0)
9263 {
9264 /* mov ret, base */
9265 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9266 }
9267 else
9268 {
9269 /* lea ret32, [base64 + disp32] */
9270 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9271 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9272 if (idxRegRet >= 8 || idxRegBase >= 8)
9273 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9274 pbCodeBuf[off++] = 0x8d;
9275 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9276 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9277 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9278 else
9279 {
9280 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9281 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9282 }
9283 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9284 if (bMod == X86_MOD_MEM4)
9285 {
9286 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9287 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9288 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9289 }
9290 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9291 }
9292 }
9293 else
9294 {
9295 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9296 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9297 if (idxRegBase == UINT8_MAX)
9298 {
9299 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
9300 if (idxRegRet >= 8 || idxRegIndex >= 8)
9301 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9302 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9303 pbCodeBuf[off++] = 0x8d;
9304 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9305 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9306 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9307 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9308 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9309 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9310 }
9311 else
9312 {
9313 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9314 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9315 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9316 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9317 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9318 pbCodeBuf[off++] = 0x8d;
9319 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9320 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9321 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9322 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9323 if (bMod != X86_MOD_MEM0)
9324 {
9325 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9326 if (bMod == X86_MOD_MEM4)
9327 {
9328 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9329 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9330 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9331 }
9332 }
9333 }
9334 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9335 }
9336
9337#elif defined(RT_ARCH_ARM64)
9338 if (u32EffAddr == 0)
9339 {
9340 if (idxRegIndex == UINT8_MAX)
9341 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9342 else if (idxRegBase == UINT8_MAX)
9343 {
9344 if (cShiftIndex == 0)
9345 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
9346 else
9347 {
9348 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9349 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
9350 }
9351 }
9352 else
9353 {
9354 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9355 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9356 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9357 }
9358 }
9359 else
9360 {
9361 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
9362 {
9363 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9364 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
9365 }
9366 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
9367 {
9368 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9369 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9370 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
9371 }
9372 else
9373 {
9374 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
9375 if (idxRegBase != UINT8_MAX)
9376 {
9377 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9378 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9379 }
9380 }
9381 if (idxRegIndex != UINT8_MAX)
9382 {
9383 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9384 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9385 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9386 }
9387 }
9388
9389#else
9390# error "port me"
9391#endif
9392
9393 if (idxRegIndex != UINT8_MAX)
9394 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9395 if (idxRegBase != UINT8_MAX)
9396 iemNativeRegFreeTmp(pReNative, idxRegBase);
9397 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9398 return off;
9399}
9400
9401
9402#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9403 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9404 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9405
9406#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9407 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9408 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9409
9410#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9411 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9412 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
9413
9414/**
9415 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
9416 *
9417 * @returns New off.
9418 * @param pReNative .
9419 * @param off .
9420 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
9421 * bit 4 to REX.X. The two bits are part of the
9422 * REG sub-field, which isn't needed in this
9423 * function.
9424 * @param uSibAndRspOffset Two parts:
9425 * - The first 8 bits make up the SIB byte.
9426 * - The next 8 bits are the fixed RSP/ESP offset
9427 * in case of a pop [xSP].
9428 * @param u32Disp The displacement byte/word/dword, if any.
9429 * @param cbInstr The size of the fully decoded instruction. Used
9430 * for RIP relative addressing.
9431 * @param idxVarRet The result variable number.
9432 * @param f64Bit Whether to use a 64-bit or 32-bit address size
9433 * when calculating the address.
9434 *
9435 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
9436 */
9437DECL_INLINE_THROW(uint32_t)
9438iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
9439 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
9440{
9441 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9442
9443 /*
9444 * Special case the rip + disp32 form first.
9445 */
9446 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9447 {
9448 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9449 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
9450 kIemNativeGstRegUse_ReadOnly);
9451#ifdef RT_ARCH_AMD64
9452 if (f64Bit)
9453 {
9454 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
9455 if ((int32_t)offFinalDisp == offFinalDisp)
9456 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
9457 else
9458 {
9459 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
9460 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
9461 }
9462 }
9463 else
9464 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
9465
9466#elif defined(RT_ARCH_ARM64)
9467 if (f64Bit)
9468 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9469 (int64_t)(int32_t)u32Disp + cbInstr);
9470 else
9471 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9472 (int32_t)u32Disp + cbInstr);
9473
9474#else
9475# error "Port me!"
9476#endif
9477 iemNativeRegFreeTmp(pReNative, idxRegPc);
9478 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9479 return off;
9480 }
9481
9482 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9483 int64_t i64EffAddr = 0;
9484 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9485 {
9486 case 0: break;
9487 case 1: i64EffAddr = (int8_t)u32Disp; break;
9488 case 2: i64EffAddr = (int32_t)u32Disp; break;
9489 default: AssertFailed();
9490 }
9491
9492 /* Get the register (or SIB) value. */
9493 uint8_t idxGstRegBase = UINT8_MAX;
9494 uint8_t idxGstRegIndex = UINT8_MAX;
9495 uint8_t cShiftIndex = 0;
9496 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
9497 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
9498 else /* SIB: */
9499 {
9500 /* index /w scaling . */
9501 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9502 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9503 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
9504 if (idxGstRegIndex == 4)
9505 {
9506 /* no index */
9507 cShiftIndex = 0;
9508 idxGstRegIndex = UINT8_MAX;
9509 }
9510
9511 /* base */
9512 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
9513 if (idxGstRegBase == 4)
9514 {
9515 /* pop [rsp] hack */
9516 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
9517 }
9518 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
9519 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
9520 {
9521 /* mod=0 and base=5 -> disp32, no base reg. */
9522 Assert(i64EffAddr == 0);
9523 i64EffAddr = (int32_t)u32Disp;
9524 idxGstRegBase = UINT8_MAX;
9525 }
9526 }
9527
9528 /*
9529 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9530 * the start of the function.
9531 */
9532 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9533 {
9534 if (f64Bit)
9535 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
9536 else
9537 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
9538 return off;
9539 }
9540
9541 /*
9542 * Now emit code that calculates:
9543 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9544 * or if !f64Bit:
9545 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9546 */
9547 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9548 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9549 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9550 kIemNativeGstRegUse_ReadOnly);
9551 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9552 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9553 kIemNativeGstRegUse_ReadOnly);
9554
9555 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9556 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9557 {
9558 idxRegBase = idxRegIndex;
9559 idxRegIndex = UINT8_MAX;
9560 }
9561
9562#ifdef RT_ARCH_AMD64
9563 uint8_t bFinalAdj;
9564 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
9565 bFinalAdj = 0; /* likely */
9566 else
9567 {
9568 /* pop [rsp] with a problematic disp32 value. Split out the
9569 RSP offset and add it separately afterwards (bFinalAdj). */
9570 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
9571 Assert(idxGstRegBase == X86_GREG_xSP);
9572 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
9573 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
9574 Assert(bFinalAdj != 0);
9575 i64EffAddr -= bFinalAdj;
9576 Assert((int32_t)i64EffAddr == i64EffAddr);
9577 }
9578 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
9579//pReNative->pInstrBuf[off++] = 0xcc;
9580
9581 if (idxRegIndex == UINT8_MAX)
9582 {
9583 if (u32EffAddr == 0)
9584 {
9585 /* mov ret, base */
9586 if (f64Bit)
9587 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
9588 else
9589 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9590 }
9591 else
9592 {
9593 /* lea ret, [base + disp32] */
9594 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9595 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9596 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
9597 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9598 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9599 | (f64Bit ? X86_OP_REX_W : 0);
9600 pbCodeBuf[off++] = 0x8d;
9601 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9602 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9603 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9604 else
9605 {
9606 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9607 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9608 }
9609 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9610 if (bMod == X86_MOD_MEM4)
9611 {
9612 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9613 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9614 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9615 }
9616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9617 }
9618 }
9619 else
9620 {
9621 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9622 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9623 if (idxRegBase == UINT8_MAX)
9624 {
9625 /* lea ret, [(index64 << cShiftIndex) + disp32] */
9626 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
9627 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9628 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9629 | (f64Bit ? X86_OP_REX_W : 0);
9630 pbCodeBuf[off++] = 0x8d;
9631 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9632 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9633 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9634 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9635 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9636 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9637 }
9638 else
9639 {
9640 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9641 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9642 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9643 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9644 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9645 | (f64Bit ? X86_OP_REX_W : 0);
9646 pbCodeBuf[off++] = 0x8d;
9647 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9648 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9649 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9650 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9651 if (bMod != X86_MOD_MEM0)
9652 {
9653 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9654 if (bMod == X86_MOD_MEM4)
9655 {
9656 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9657 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9658 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9659 }
9660 }
9661 }
9662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9663 }
9664
9665 if (!bFinalAdj)
9666 { /* likely */ }
9667 else
9668 {
9669 Assert(f64Bit);
9670 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
9671 }
9672
9673#elif defined(RT_ARCH_ARM64)
9674 if (i64EffAddr == 0)
9675 {
9676 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9677 if (idxRegIndex == UINT8_MAX)
9678 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
9679 else if (idxRegBase != UINT8_MAX)
9680 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9681 f64Bit, false /*fSetFlags*/, cShiftIndex);
9682 else
9683 {
9684 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
9685 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
9686 }
9687 }
9688 else
9689 {
9690 if (f64Bit)
9691 { /* likely */ }
9692 else
9693 i64EffAddr = (int32_t)i64EffAddr;
9694
9695 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
9696 {
9697 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9698 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
9699 }
9700 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
9701 {
9702 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9703 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
9704 }
9705 else
9706 {
9707 if (f64Bit)
9708 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
9709 else
9710 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
9711 if (idxRegBase != UINT8_MAX)
9712 {
9713 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9714 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
9715 }
9716 }
9717 if (idxRegIndex != UINT8_MAX)
9718 {
9719 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9720 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9721 f64Bit, false /*fSetFlags*/, cShiftIndex);
9722 }
9723 }
9724
9725#else
9726# error "port me"
9727#endif
9728
9729 if (idxRegIndex != UINT8_MAX)
9730 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9731 if (idxRegBase != UINT8_MAX)
9732 iemNativeRegFreeTmp(pReNative, idxRegBase);
9733 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9734 return off;
9735}
9736
9737
9738/*********************************************************************************************************************************
9739* TLB Lookup. *
9740*********************************************************************************************************************************/
9741
9742#if defined(RT_ARCH_AMD64) && 1
9743# define IEMNATIVE_WITH_TLB_LOOKUP
9744#endif
9745
9746
9747/**
9748 * This must be instantiate *before* branching off to the lookup code,
9749 * so that register spilling and whatnot happens for everyone.
9750 */
9751typedef struct IEMNATIVEEMITTLBSTATE
9752{
9753 uint64_t const uAbsPtr;
9754 bool const fSkip;
9755 uint8_t const idxRegPtr;
9756 uint8_t const idxRegSegBase;
9757 uint8_t const idxRegSegLimit;
9758 uint8_t const idxRegSegAttrib;
9759 uint8_t const idxReg1;
9760 uint8_t const idxReg2;
9761
9762 IEMNATIVEEMITTLBSTATE(PIEMRECOMPILERSTATE a_pReNative, uint32_t *a_poff, uint8_t a_idxVarGCPtrMem,
9763 uint8_t a_iSegReg, uint8_t a_cbMem)
9764 : uAbsPtr( a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate
9765 ? UINT64_MAX
9766 : a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue)
9767#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9768 /* 32-bit and 64-bit wraparound will require special handling, so skip these for absolute addresses. */
9769 , fSkip( a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
9770 && ( (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT
9771 ? (uint64_t)(UINT32_MAX - a_cbMem)
9772 : (uint64_t)(UINT64_MAX - a_cbMem))
9773 < a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue)
9774#else
9775 , fSkip(true)
9776#endif
9777 , idxRegPtr(a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate
9778 ? iemNativeVarRegisterAcquire(a_pReNative, a_idxVarGCPtrMem, a_poff,
9779 true /*fInitialized*/, IEMNATIVE_CALL_ARG2_GREG)
9780 : UINT8_MAX)
9781 , idxRegSegBase(a_iSegReg == UINT8_MAX || fSkip
9782 ? UINT8_MAX
9783 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_BASE(a_iSegReg)))
9784 , idxRegSegLimit((a_iSegReg == UINT8_MAX && (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT) || fSkip
9785 ? UINT8_MAX
9786 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_LIMIT(a_iSegReg)))
9787 , idxRegSegAttrib((a_iSegReg == UINT8_MAX && (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT) || fSkip
9788 ? UINT8_MAX
9789 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_ATTRIB(a_iSegReg)))
9790 , idxReg1(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
9791 , idxReg2(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
9792
9793 {
9794 RT_NOREF_PV(a_cbMem);
9795 }
9796
9797 void freeRegsAndReleaseVars(PIEMRECOMPILERSTATE a_pReNative, uint8_t idxVarGCPtrMem) const
9798 {
9799 if (idxRegPtr != UINT8_MAX)
9800 iemNativeVarRegisterRelease(a_pReNative, idxVarGCPtrMem);
9801 if (idxRegSegBase != UINT8_MAX)
9802 iemNativeRegFreeTmp(a_pReNative, idxRegSegBase);
9803 if (idxRegSegLimit != UINT8_MAX)
9804 {
9805 iemNativeRegFreeTmp(a_pReNative, idxRegSegLimit);
9806 iemNativeRegFreeTmp(a_pReNative, idxRegSegAttrib);
9807 }
9808 else
9809 Assert(idxRegSegAttrib == UINT8_MAX);
9810 iemNativeRegFreeTmp(a_pReNative, idxReg2);
9811 iemNativeRegFreeTmp(a_pReNative, idxReg1);
9812 }
9813} IEMNATIVEEMITTLBSTATE;
9814
9815
9816#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9817DECL_INLINE_THROW(uint32_t)
9818iemNativeEmitTlbLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEEMITTLBSTATE const * const pTlbState,
9819 uint8_t iSegReg, uint8_t cbMem, uint8_t fAlignMask, uint32_t fAccess,
9820 uint32_t idxLabelTlbLookup, uint32_t idxLabelTlbMiss, uint8_t idxRegMemResult,
9821 uint8_t offDisp = 0)
9822{
9823 RT_NOREF(offDisp);
9824 Assert(!pTlbState->fSkip);
9825# if defined(RT_ARCH_AMD64)
9826 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 512);
9827# elif defined(RT_ARCH_ARM64)
9828 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
9829# endif
9830
9831 /*
9832 * The expand down check isn't use all that much, so we emit here to keep
9833 * the lookup straighter.
9834 */
9835 /* check_expand_down: ; complicted! */
9836 uint32_t const offCheckExpandDown = off;
9837 uint32_t offFixupLimitDone = 0;
9838 if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
9839 {
9840off = iemNativeEmitBrkEx(pCodeBuf, off, 1); /** @todo this needs testing */
9841 /* cmp seglim, regptr */
9842 if (pTlbState->idxRegPtr != UINT8_MAX)
9843 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, pTlbState->idxRegPtr);
9844 else
9845 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit, (uint32_t)pTlbState->uAbsPtr);
9846 /* ja tlbmiss */
9847 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
9848 /* mov reg1, X86DESCATTR_D (0x4000) */
9849 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_D);
9850 /* and reg1, segattr */
9851 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib);
9852 /* xor reg1, X86DESCATTR_D */
9853 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_D);
9854 /* shl reg1, 2 (16 - 14) */
9855 AssertCompile((X86DESCATTR_D << 2) == UINT32_C(0x10000));
9856 off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, pTlbState->idxReg1, 2);
9857 /* dec reg1 (=> 0xffff if D=0; 0xffffffff if D=1) */
9858 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, 1);
9859 /* cmp reg1, reg2 (64-bit) / imm (32-bit) */
9860 if (pTlbState->idxRegPtr != UINT8_MAX)
9861 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2);
9862 else
9863 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, (uint32_t)(pTlbState->uAbsPtr + cbMem - 1));
9864 /* jbe tlbmiss */
9865 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
9866 /* jmp limitdone */
9867 offFixupLimitDone = off;
9868 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off /* ASSUME short jump suffices */);
9869 }
9870
9871 /*
9872 * tlblookup:
9873 */
9874 iemNativeLabelDefine(pReNative, idxLabelTlbLookup, off);
9875
9876 /*
9877 * 1. Segmentation.
9878 *
9879 * 1a. Check segment limit and attributes if non-flat 32-bit code. This is complicated.
9880 */
9881 if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
9882 {
9883 /* If we're accessing more than one byte, put the last address we'll be
9884 accessing in idxReg2 (64-bit). */
9885 if (cbMem > 1 && pTlbState->idxRegPtr != UINT8_MAX)
9886 {
9887# if 1
9888 Assert(cbMem - 1 <= 127);
9889 /* mov reg2, regptr */
9890 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, pTlbState->idxReg2, pTlbState->idxRegPtr);
9891 /* add reg2, cbMem-1 */
9892 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, pTlbState->idxReg2, cbMem - 1);
9893# else
9894 /* mov reg2, cbMem-1 */
9895 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg2, cbMem - 1);
9896 /* add reg2, regptr */
9897 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, pTlbState->idxReg2, pTlbState->idxRegPtr);
9898# endif
9899 }
9900
9901 /* Check that we've got a segment loaded and that it allows the access.
9902 For write access this means a writable data segment.
9903 For read-only accesses this means a readable code segment or any data segment. */
9904 if (fAccess & IEM_ACCESS_TYPE_WRITE)
9905 {
9906 uint32_t const fMustBe1 = X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_WRITE;
9907 uint32_t const fMustBe0 = X86DESCATTR_UNUSABLE | X86_SEL_TYPE_CODE;
9908 /* mov reg1, must1|must0 */
9909 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, fMustBe1 | fMustBe0);
9910 /* and reg1, segattrs */
9911 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib);
9912 /* cmp reg1, must1 */
9913 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, fMustBe1);
9914 /* jne tlbmiss */
9915 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
9916 }
9917 else
9918 {
9919 /* U | !P |!DT |!CD | RW |
9920 16 | 8 | 4 | 3 | 1 |
9921 -------------------------------
9922 0 | 0 | 0 | 0 | 0 | execute-only code segment. - must be excluded
9923 0 | 0 | 0 | 0 | 1 | execute-read code segment.
9924 0 | 0 | 0 | 1 | 0 | read-only data segment.
9925 0 | 0 | 0 | 1 | 1 | read-write data segment. - last valid combination
9926 */
9927 /* mov reg1, relevant attributes */
9928 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1,
9929 X86DESCATTR_UNUSABLE | X86DESCATTR_P | X86DESCATTR_DT
9930 | X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE);
9931 /* and reg1, segattrs */
9932 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib);
9933 /* xor reg1, X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE ; place C=1 RW=0 at the bottom & limit the range.
9934 ; EO-code=0, ER-code=2, RO-data=8, RW-data=10 */
9935 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE);
9936 /* sub reg1, X86_SEL_TYPE_WRITE ; EO-code=-2, ER-code=0, RO-data=6, RW-data=8 */
9937 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_WRITE /* ER-code */);
9938 /* cmp reg1, X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE */
9939 AssertCompile(X86_SEL_TYPE_CODE == 8);
9940 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_CODE);
9941 /* ja tlbmiss */
9942 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
9943 }
9944
9945 /*
9946 * Check the limit. If this is a write access, we know that it's a
9947 * data segment and includes the expand_down bit. For read-only accesses
9948 * we need to check that code/data=0 and expanddown=1 before continuing.
9949 */
9950 if (fAccess & IEM_ACCESS_TYPE_WRITE)
9951 {
9952 /* test segattrs, X86_SEL_TYPE_DOWN */
9953 AssertCompile(X86_SEL_TYPE_DOWN < 128);
9954 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, pTlbState->idxRegSegAttrib, X86_SEL_TYPE_DOWN);
9955 /* jnz check_expand_down */
9956 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_ne);
9957 }
9958 else
9959 {
9960 /* mov reg1, segattrs */
9961 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib);
9962 /* and reg1, code | down */
9963 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_CODE | X86_SEL_TYPE_DOWN);
9964 /* cmp reg1, down */
9965 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_DOWN);
9966 /* je check_expand_down */
9967 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_e);
9968 }
9969
9970 /* expand_up:
9971 cmp seglim, regptr/reg2/imm */
9972 if (pTlbState->idxRegPtr != UINT8_MAX)
9973 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, cbMem > 1 ? pTlbState->idxReg2 : pTlbState->idxRegPtr);
9974 else
9975 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit, (uint32_t)pTlbState->uAbsPtr);
9976 /* jbe tlbmiss */
9977 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
9978
9979 /* limitdone: */
9980 iemNativeFixupFixedJump(pReNative, offFixupLimitDone, off);
9981 }
9982
9983 /* 1b. Add the segment base. We use idxRegMemResult for the ptr register if this step is
9984 required or if the address is a constant (simplicity). */
9985 uint8_t const idxRegFlatPtr = iSegReg != UINT8_MAX || pTlbState->idxRegPtr == UINT8_MAX
9986 ? idxRegMemResult : pTlbState->idxRegPtr;
9987 if (iSegReg != UINT8_MAX)
9988 {
9989 /** @todo this can be done using LEA as well. */
9990 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
9991 {
9992 Assert(iSegReg >= X86_SREG_FS);
9993 /* mov regflat, regptr/imm */
9994 if (pTlbState->idxRegPtr != UINT8_MAX)
9995 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegPtr);
9996 else
9997 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->uAbsPtr);
9998 /* add regflat, seg.base */
9999 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase);
10000 }
10001 else
10002 {
10003 /* mov regflat, regptr/imm */
10004 if (pTlbState->idxRegPtr != UINT8_MAX)
10005 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegPtr);
10006 else
10007 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->uAbsPtr);
10008 /* add regflat, seg.base */
10009 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase);
10010 }
10011 }
10012 else if (pTlbState->idxRegPtr == UINT8_MAX)
10013 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->uAbsPtr);
10014
10015 /*
10016 * 2. Check that the address doesn't cross a page boundrary and doesn't have alignment issues.
10017 *
10018 * 2a. Alignment check using fAlignMask.
10019 */
10020 if (fAlignMask)
10021 {
10022 Assert(RT_IS_POWER_OF_TWO(fAlignMask + 1));
10023 Assert(fAlignMask < 128);
10024 /* test regflat, fAlignMask */
10025 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegFlatPtr, fAlignMask);
10026 /* jnz tlbmiss */
10027 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10028 }
10029
10030 /*
10031 * 2b. Check that it's not crossing page a boundrary. This is implicit in
10032 * the previous test if the alignment is same or larger than the type.
10033 */
10034 if (cbMem > fAlignMask + 1)
10035 {
10036 /* mov reg1, 0xfff */
10037 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_OFFSET_MASK);
10038 /* and reg1, regflat */
10039 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr);
10040 /* neg reg1 */
10041 off = iemNativeEmitNegGpr32Ex(pCodeBuf, off, pTlbState->idxReg1);
10042 /* add reg1, 0x1000 */
10043 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE);
10044 /* cmp reg1, cbMem */
10045 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE);
10046 /* ja tlbmiss */
10047 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10048 }
10049
10050 /*
10051 * 3. TLB lookup.
10052 *
10053 * 3a. Calculate the TLB tag value (IEMTLB_CALC_TAG).
10054 * In 64-bit mode we will also check for non-canonical addresses here.
10055 */
10056 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
10057 {
10058 /* mov reg1, regflat */
10059 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr);
10060 /* rol reg1, 16 */
10061 off = iemNativeEmitRotateGprLeftEx(pCodeBuf, off, pTlbState->idxReg1, 16);
10062 /** @todo Would 'movsx reg2, word reg1' and working on reg2 in dwords be faster? */
10063 /* inc word reg1 */
10064 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10065 if (pTlbState->idxReg1 >= 8)
10066 pCodeBuf[off++] = X86_OP_REX_B;
10067 pCodeBuf[off++] = 0xff;
10068 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, pTlbState->idxReg1 & 7);
10069 /* cmp word reg1, 1 */
10070 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10071 if (pTlbState->idxReg1 >= 8)
10072 pCodeBuf[off++] = X86_OP_REX_B;
10073 pCodeBuf[off++] = 0x83;
10074 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, pTlbState->idxReg1 & 7);
10075 pCodeBuf[off++] = 1;
10076 /* ja tlbmiss */
10077 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10078 /* shr reg1, 16 + GUEST_PAGE_SHIFT */
10079 off = iemNativeEmitShiftGprRightEx(pCodeBuf, off, pTlbState->idxReg1, 16 + GUEST_PAGE_SHIFT);
10080 }
10081 else
10082 {
10083 /* mov reg1, regflat */
10084 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr);
10085 /* shr reg1, GUEST_PAGE_SHIFT */
10086 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SHIFT);
10087 }
10088 /* or reg1, [qword pVCpu->iem.s.DataTlb.uTlbRevision] */
10089 pCodeBuf[off++] = pTlbState->idxReg1 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
10090 pCodeBuf[off++] = 0x0b; /* OR r64,r/m64 */
10091 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, pTlbState->idxReg1, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbRevision));
10092
10093 /*
10094 * 3b. Calc pTlbe.
10095 */
10096 /* movzx reg2, byte reg1 */
10097 off = iemNativeEmitLoadGprFromGpr8Ex(pCodeBuf, off, pTlbState->idxReg2, pTlbState->idxReg1);
10098 /* shl reg2, 5 ; reg2 *= sizeof(IEMTLBENTRY) */
10099 AssertCompileSize(IEMTLBENTRY, 32);
10100 off = iemNativeEmitShiftGprLeftEx(pCodeBuf, off, pTlbState->idxReg2, 5);
10101 /* lea reg2, [pVCpu->iem.s.DataTlb.aEntries + reg2] */
10102 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU < 8);
10103 pCodeBuf[off++] = pTlbState->idxReg2 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_X | X86_OP_REX_R;
10104 pCodeBuf[off++] = 0x8d;
10105 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, pTlbState->idxReg2 & 7, 4 /*SIB*/);
10106 pCodeBuf[off++] = X86_SIB_MAKE(IEMNATIVE_REG_FIXED_PVMCPU & 7, pTlbState->idxReg2 & 7, 0);
10107 pCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10108 pCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10109 pCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10110 pCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10111
10112 /*
10113 * 3c. Compare the TLBE.uTag with the one from 2a (reg1).
10114 */
10115 /* cmp reg1, [reg2] */
10116 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
10117 pCodeBuf[off++] = 0x3b;
10118 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
10119 /* jne tlbmiss */
10120 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10121
10122 /*
10123 * 4. Check TLB page table level access flags and physical page revision #.
10124 */
10125 /* mov reg1, mask */
10126 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
10127 uint64_t const fNoUser = (((pReNative->fExec >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK) + 1) & IEMTLBE_F_PT_NO_USER;
10128 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1,
10129 IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
10130 | IEMTLBE_F_PG_UNASSIGNED | IEMTLBE_F_PG_NO_READ
10131 | IEMTLBE_F_PT_NO_ACCESSED | fNoUser);
10132 /* and reg1, [reg2->fFlagsAndPhysRev] */
10133 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
10134 pCodeBuf[off++] = 0x23;
10135 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
10136
10137 /* cmp reg1, [pVCpu->iem.s.DataTlb.uTlbPhysRev] */
10138 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R);
10139 pCodeBuf[off++] = 0x3b;
10140 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, IEMNATIVE_REG_FIXED_PVMCPU,
10141 RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbPhysRev));
10142 /* jne tlbmiss */
10143 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10144
10145 /*
10146 * 5. Check that pbMappingR3 isn't NULL (paranoia) and calculate the
10147 * resulting pointer.
10148 */
10149 /* mov reg1, [reg2->pbMappingR3] */
10150 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
10151 pCodeBuf[off++] = 0x8b;
10152 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, pbMappingR3));
10153
10154 /** @todo eliminate the need for this test? */
10155 /* test reg1, reg1 */
10156 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
10157 pCodeBuf[off++] = 0x85;
10158 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, pTlbState->idxReg1 & 7, pTlbState->idxReg1 & 7);
10159
10160 /* jz tlbmiss */
10161 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_e);
10162
10163 if (idxRegFlatPtr == idxRegMemResult) /* See step 1b. */
10164 {
10165 /* and result, 0xfff */
10166 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegMemResult, GUEST_PAGE_OFFSET_MASK);
10167 }
10168 else
10169 {
10170 Assert(idxRegFlatPtr == pTlbState->idxRegPtr);
10171 /* mov result, 0xfff */
10172 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxRegMemResult, GUEST_PAGE_OFFSET_MASK);
10173 /* and result, regflat */
10174 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegMemResult, idxRegFlatPtr);
10175 }
10176 /* add result, reg1 */
10177 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxReg1);
10178
10179 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10180
10181 return off;
10182}
10183#endif
10184
10185
10186/*********************************************************************************************************************************
10187* Memory fetches and stores common *
10188*********************************************************************************************************************************/
10189
10190typedef enum IEMNATIVEMITMEMOP
10191{
10192 kIemNativeEmitMemOp_Store = 0,
10193 kIemNativeEmitMemOp_Fetch,
10194 kIemNativeEmitMemOp_Fetch_Zx_U16,
10195 kIemNativeEmitMemOp_Fetch_Zx_U32,
10196 kIemNativeEmitMemOp_Fetch_Zx_U64,
10197 kIemNativeEmitMemOp_Fetch_Sx_U16,
10198 kIemNativeEmitMemOp_Fetch_Sx_U32,
10199 kIemNativeEmitMemOp_Fetch_Sx_U64
10200} IEMNATIVEMITMEMOP;
10201
10202/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
10203 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
10204 * (with iSegReg = UINT8_MAX). */
10205DECL_INLINE_THROW(uint32_t)
10206iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
10207 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
10208 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
10209{
10210 /*
10211 * Assert sanity.
10212 */
10213 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10214 Assert( enmOp != kIemNativeEmitMemOp_Store
10215 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
10216 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
10217 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
10218 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10219 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
10220 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10221 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
10222 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
10223 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
10224#ifdef VBOX_STRICT
10225 if (iSegReg == UINT8_MAX)
10226 {
10227 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10228 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10229 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10230 switch (cbMem)
10231 {
10232 case 1:
10233 Assert( pfnFunction
10234 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
10235 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10236 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10237 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10238 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10239 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
10240 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
10241 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
10242 : UINT64_C(0xc000b000a0009000) ));
10243 break;
10244 case 2:
10245 Assert( pfnFunction
10246 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
10247 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10248 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10249 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10250 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
10251 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
10252 : UINT64_C(0xc000b000a0009000) ));
10253 break;
10254 case 4:
10255 Assert( pfnFunction
10256 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
10257 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10258 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10259 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
10260 : UINT64_C(0xc000b000a0009000) ));
10261 break;
10262 case 8:
10263 Assert( pfnFunction
10264 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
10265 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
10266 : UINT64_C(0xc000b000a0009000) ));
10267 break;
10268 }
10269 }
10270 else
10271 {
10272 Assert(iSegReg < 6);
10273 switch (cbMem)
10274 {
10275 case 1:
10276 Assert( pfnFunction
10277 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
10278 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
10279 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10280 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10281 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10282 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
10283 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
10284 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
10285 : UINT64_C(0xc000b000a0009000) ));
10286 break;
10287 case 2:
10288 Assert( pfnFunction
10289 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
10290 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
10291 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
10292 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
10293 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
10294 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
10295 : UINT64_C(0xc000b000a0009000) ));
10296 break;
10297 case 4:
10298 Assert( pfnFunction
10299 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
10300 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
10301 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
10302 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
10303 : UINT64_C(0xc000b000a0009000) ));
10304 break;
10305 case 8:
10306 Assert( pfnFunction
10307 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
10308 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
10309 : UINT64_C(0xc000b000a0009000) ));
10310 break;
10311 }
10312 }
10313#endif
10314
10315#ifdef VBOX_STRICT
10316 /*
10317 * Check that the fExec flags we've got make sense.
10318 */
10319 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10320#endif
10321
10322 /*
10323 * To keep things simple we have to commit any pending writes first as we
10324 * may end up making calls.
10325 */
10326 /** @todo we could postpone this till we make the call and reload the
10327 * registers after returning from the call. Not sure if that's sensible or
10328 * not, though. */
10329 off = iemNativeRegFlushPendingWrites(pReNative, off);
10330
10331 /*
10332 * Move/spill/flush stuff out of call-volatile registers.
10333 * This is the easy way out. We could contain this to the tlb-miss branch
10334 * by saving and restoring active stuff here.
10335 */
10336 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10337 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10338
10339 /*
10340 * Define labels and allocate the result register (trying for the return
10341 * register if we can).
10342 */
10343 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10344 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10345 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10346 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX /* special case value storing below */
10347 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
10348 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
10349 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
10350
10351 /*
10352 * First we try to go via the TLB.
10353 */
10354//pReNative->pInstrBuf[off++] = 0xcc;
10355 /** @todo later. */
10356 RT_NOREF(fAlignMask, cbMem);
10357
10358 /*
10359 * Call helper to do the fetching.
10360 * We flush all guest register shadow copies here.
10361 */
10362 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10363
10364#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10365 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10366#else
10367 RT_NOREF(idxInstr);
10368#endif
10369
10370 uint8_t idxRegArgValue;
10371 if (iSegReg == UINT8_MAX)
10372 idxRegArgValue = IEMNATIVE_CALL_ARG2_GREG;
10373 else
10374 {
10375 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
10376 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
10377 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
10378
10379 idxRegArgValue = IEMNATIVE_CALL_ARG3_GREG;
10380 }
10381
10382 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
10383 if (enmOp == kIemNativeEmitMemOp_Store)
10384 {
10385 if (pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate)
10386 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgValue, pReNative->Core.aVars[idxVarValue].u.uValue);
10387 else
10388 {
10389 uint8_t const idxRegVarValue = pReNative->Core.aVars[idxVarValue].idxReg;
10390 if (idxRegVarValue < RT_ELEMENTS(pReNative->Core.aHstRegs))
10391 {
10392 Assert(!(RT_BIT_32(idxRegVarValue) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10393 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgValue, idxRegVarValue);
10394 }
10395 else
10396 {
10397 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarValue].idxStackSlot;
10398 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10399 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgValue, iemNativeStackCalcBpDisp(idxStackSlot));
10400 }
10401 }
10402 }
10403
10404 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
10405 if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
10406 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG,
10407 pReNative->Core.aVars[idxVarGCPtrMem].u.uValue + offDisp);
10408 else
10409 {
10410 uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
10411 if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
10412 {
10413 Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10414 if (!offDisp)
10415 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem);
10416 else
10417 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem, offDisp);
10418 }
10419 else
10420 {
10421 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarGCPtrMem].idxStackSlot;
10422 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10423 AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
10424 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iemNativeStackCalcBpDisp(idxStackSlot));
10425 if (offDisp)
10426 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offDisp);
10427 }
10428 }
10429
10430 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10431 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10432
10433 /* Done setting up parameters, make the call. */
10434 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10435
10436 /*
10437 * Put the result in the right register if this is a fetch.
10438 */
10439 if (enmOp != kIemNativeEmitMemOp_Store)
10440 {
10441 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
10442 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
10443 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
10444 iemNativeVarRegisterRelease(pReNative, idxVarValue);
10445 }
10446
10447 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10448
10449 return off;
10450}
10451
10452
10453
10454/*********************************************************************************************************************************
10455* Memory fetches (IEM_MEM_FETCH_XXX). *
10456*********************************************************************************************************************************/
10457
10458/* 8-bit segmented: */
10459#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
10460 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
10461 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10462 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10463
10464#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10465 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10466 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10467 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10468
10469#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10470 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10471 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10472 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10473
10474#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10475 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10476 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10477 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10478
10479#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10480 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10481 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10482 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10483
10484#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10485 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10486 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10487 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10488
10489#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10490 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10491 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10492 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10493
10494/* 16-bit segmented: */
10495#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10496 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10497 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10498 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10499
10500#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10501 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10502 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10503 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10504
10505#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10506 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10507 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10508 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10509
10510#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10511 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10512 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10513 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10514
10515#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10516 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10517 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10518 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10519
10520#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10521 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10522 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10523 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10524
10525
10526/* 32-bit segmented: */
10527#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10528 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10529 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10530 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10531
10532#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10533 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10534 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10535 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10536
10537#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10538 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10539 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10540 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10541
10542#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10543 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10544 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10545 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10546
10547
10548/* 64-bit segmented: */
10549#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10550 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10551 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10552 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
10553
10554
10555
10556/* 8-bit flat: */
10557#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
10558 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
10559 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10560 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10561
10562#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
10563 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10564 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10565 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10566
10567#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
10568 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10569 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10570 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10571
10572#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
10573 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10574 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10575 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10576
10577#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
10578 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10579 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10580 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10581
10582#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
10583 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10584 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10585 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10586
10587#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
10588 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10589 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10590 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10591
10592
10593/* 16-bit flat: */
10594#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
10595 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10596 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10597 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10598
10599#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
10600 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10601 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10602 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10603
10604#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
10605 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10606 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10607 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10608
10609#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
10610 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10611 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10612 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10613
10614#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
10615 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10616 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10617 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10618
10619#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
10620 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10621 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10622 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10623
10624/* 32-bit flat: */
10625#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
10626 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10627 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10628 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10629
10630#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
10631 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10632 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10633 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10634
10635#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
10636 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10637 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10638 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10639
10640#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
10641 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10642 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10643 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10644
10645/* 64-bit flat: */
10646#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
10647 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10648 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10649 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
10650
10651
10652
10653/*********************************************************************************************************************************
10654* Memory stores (IEM_MEM_STORE_XXX). *
10655*********************************************************************************************************************************/
10656
10657#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
10658 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
10659 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10660 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
10661
10662#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
10663 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
10664 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10665 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
10666
10667#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
10668 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
10669 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10670 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
10671
10672#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
10673 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
10674 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
10675 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
10676
10677
10678#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
10679 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
10680 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10681 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
10682
10683#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
10684 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
10685 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10686 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
10687
10688#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
10689 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
10690 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10691 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
10692
10693#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
10694 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
10695 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
10696 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
10697
10698
10699#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
10700 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10701 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
10702
10703#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
10704 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10705 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
10706
10707#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
10708 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10709 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
10710
10711#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
10712 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10713 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
10714
10715
10716#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
10717 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10718 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
10719
10720#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
10721 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10722 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
10723
10724#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
10725 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10726 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
10727
10728#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
10729 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10730 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
10731
10732/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
10733 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
10734DECL_INLINE_THROW(uint32_t)
10735iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
10736 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
10737{
10738 /*
10739 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
10740 * to do the grunt work.
10741 */
10742 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
10743 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
10744 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
10745 pfnFunction, idxInstr);
10746 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
10747 return off;
10748}
10749
10750
10751
10752/*********************************************************************************************************************************
10753* Stack Accesses. *
10754*********************************************************************************************************************************/
10755/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
10756#define IEM_MC_PUSH_U16(a_u16Value) \
10757 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
10758 (uintptr_t)iemNativeHlpStackPushU16, pCallEntry->idxInstr)
10759#define IEM_MC_PUSH_U32(a_u32Value) \
10760 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
10761 (uintptr_t)iemNativeHlpStackPushU32, pCallEntry->idxInstr)
10762#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
10763 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
10764 (uintptr_t)iemNativeHlpStackPushU32SReg, pCallEntry->idxInstr)
10765#define IEM_MC_PUSH_U64(a_u64Value) \
10766 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
10767 (uintptr_t)iemNativeHlpStackPushU64, pCallEntry->idxInstr)
10768
10769#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
10770 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
10771 (uintptr_t)iemNativeHlpStackFlat32PushU16, pCallEntry->idxInstr)
10772#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
10773 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
10774 (uintptr_t)iemNativeHlpStackFlat32PushU32, pCallEntry->idxInstr)
10775#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
10776 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
10777 (uintptr_t)iemNativeHlpStackFlat32PushU32SReg, pCallEntry->idxInstr)
10778
10779#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
10780 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
10781 (uintptr_t)iemNativeHlpStackFlat64PushU16, pCallEntry->idxInstr)
10782#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
10783 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
10784 (uintptr_t)iemNativeHlpStackFlat64PushU64, pCallEntry->idxInstr)
10785
10786/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
10787DECL_INLINE_THROW(uint32_t)
10788iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
10789 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
10790{
10791 /*
10792 * Assert sanity.
10793 */
10794 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10795#ifdef VBOX_STRICT
10796 if (RT_BYTE2(cBitsVarAndFlat) != 0)
10797 {
10798 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10799 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10800 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10801 Assert( pfnFunction
10802 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU16
10803 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32
10804 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32SReg
10805 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU16
10806 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU64
10807 : UINT64_C(0xc000b000a0009000) ));
10808 }
10809 else
10810 Assert( pfnFunction
10811 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU16
10812 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU32
10813 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackPushU32SReg
10814 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU64
10815 : UINT64_C(0xc000b000a0009000) ));
10816#endif
10817
10818#ifdef VBOX_STRICT
10819 /*
10820 * Check that the fExec flags we've got make sense.
10821 */
10822 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10823#endif
10824
10825 /*
10826 * To keep things simple we have to commit any pending writes first as we
10827 * may end up making calls.
10828 */
10829 /** @todo we could postpone this till we make the call and reload the
10830 * registers after returning from the call. Not sure if that's sensible or
10831 * not, though. */
10832 off = iemNativeRegFlushPendingWrites(pReNative, off);
10833
10834 /*
10835 * Move/spill/flush stuff out of call-volatile registers, keeping whatever
10836 * idxVarValue might be occupying.
10837 *
10838 * This is the easy way out. We could contain this to the tlb-miss branch
10839 * by saving and restoring active stuff here.
10840 */
10841 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10842 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarValue));
10843
10844 /* For now, flush any shadow copy of the xSP register. */
10845 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
10846
10847 /*
10848 * Define labels and allocate the result register (trying for the return
10849 * register if we can).
10850 */
10851 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10852 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10853 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10854
10855 /*
10856 * First we try to go via the TLB.
10857 */
10858//pReNative->pInstrBuf[off++] = 0xcc;
10859 /** @todo later. */
10860 RT_NOREF(cBitsVarAndFlat);
10861
10862 /*
10863 * Call helper to do the popping.
10864 */
10865 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10866
10867#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10868 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10869#else
10870 RT_NOREF(idxInstr);
10871#endif
10872
10873 /* IEMNATIVE_CALL_ARG1_GREG = idxVarValue (first) */
10874 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarValue,
10875 0 /*offAddend*/, true /*fVarAllowInVolatileReg*/);
10876
10877 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10878 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10879
10880 /* Done setting up parameters, make the call. */
10881 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10882
10883 /* The value variable is implictly flushed. */
10884 iemNativeVarFreeLocal(pReNative, idxVarValue);
10885
10886 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10887
10888 return off;
10889}
10890
10891
10892
10893/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
10894#define IEM_MC_POP_GREG_U16(a_iGReg) \
10895 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
10896 (uintptr_t)iemNativeHlpStackPopGRegU16, pCallEntry->idxInstr)
10897#define IEM_MC_POP_GREG_U32(a_iGReg) \
10898 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
10899 (uintptr_t)iemNativeHlpStackPopGRegU32, pCallEntry->idxInstr)
10900#define IEM_MC_POP_GREG_U64(a_iGReg) \
10901 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
10902 (uintptr_t)iemNativeHlpStackPopGRegU64, pCallEntry->idxInstr)
10903
10904#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
10905 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
10906 (uintptr_t)iemNativeHlpStackFlat32PopGRegU16, pCallEntry->idxInstr)
10907#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
10908 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
10909 (uintptr_t)iemNativeHlpStackFlat32PopGRegU32, pCallEntry->idxInstr)
10910
10911#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
10912 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
10913 (uintptr_t)iemNativeHlpStackFlat64PopGRegU16, pCallEntry->idxInstr)
10914#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
10915 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
10916 (uintptr_t)iemNativeHlpStackFlat64PopGRegU64, pCallEntry->idxInstr)
10917
10918/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
10919DECL_INLINE_THROW(uint32_t)
10920iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
10921 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
10922{
10923 /*
10924 * Assert sanity.
10925 */
10926 Assert(idxGReg < 16);
10927#ifdef VBOX_STRICT
10928 if (RT_BYTE2(cBitsVarAndFlat) != 0)
10929 {
10930 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10931 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10932 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10933 Assert( pfnFunction
10934 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU16
10935 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU32
10936 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU16
10937 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU64
10938 : UINT64_C(0xc000b000a0009000) ));
10939 }
10940 else
10941 Assert( pfnFunction
10942 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU16
10943 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU32
10944 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU64
10945 : UINT64_C(0xc000b000a0009000) ));
10946#endif
10947
10948#ifdef VBOX_STRICT
10949 /*
10950 * Check that the fExec flags we've got make sense.
10951 */
10952 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10953#endif
10954
10955 /*
10956 * To keep things simple we have to commit any pending writes first as we
10957 * may end up making calls.
10958 */
10959 /** @todo we could postpone this till we make the call and reload the
10960 * registers after returning from the call. Not sure if that's sensible or
10961 * not, though. */
10962 off = iemNativeRegFlushPendingWrites(pReNative, off);
10963
10964 /*
10965 * Move/spill/flush stuff out of call-volatile registers.
10966 * This is the easy way out. We could contain this to the tlb-miss branch
10967 * by saving and restoring active stuff here.
10968 */
10969 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10970 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10971
10972 /* For now, flush the any shadow copy of the guest register that is about
10973 to be popped and the xSP register. */
10974 iemNativeRegFlushGuestShadows(pReNative,
10975 RT_BIT_64(IEMNATIVEGSTREG_GPR(idxGReg)) | RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
10976
10977 /*
10978 * Define labels and allocate the result register (trying for the return
10979 * register if we can).
10980 */
10981 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10982 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10983 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10984
10985 /*
10986 * First we try to go via the TLB.
10987 */
10988//pReNative->pInstrBuf[off++] = 0xcc;
10989 /** @todo later. */
10990 RT_NOREF(cBitsVarAndFlat);
10991
10992 /*
10993 * Call helper to do the popping.
10994 */
10995 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10996
10997#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10998 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10999#else
11000 RT_NOREF(idxInstr);
11001#endif
11002
11003 /* IEMNATIVE_CALL_ARG1_GREG = iGReg */
11004 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxGReg);
11005
11006 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11007 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11008
11009 /* Done setting up parameters, make the call. */
11010 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11011
11012 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11013
11014 return off;
11015}
11016
11017
11018
11019/*********************************************************************************************************************************
11020* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
11021*********************************************************************************************************************************/
11022
11023#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11024 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11025 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11026 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
11027
11028#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11029 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11030 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11031 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
11032
11033#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11034 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11035 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
11036 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
11037
11038
11039#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11040 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11041 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11042 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
11043
11044#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11045 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11046 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11047 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
11048
11049#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11050 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11051 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11052 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
11053
11054#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11055 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
11056 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11057 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
11058
11059
11060#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11061 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11062 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11063 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
11064
11065#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11066 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11067 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11068 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
11069
11070#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11071 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11072 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11073 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
11074
11075#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11076 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
11077 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11078 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
11079
11080
11081#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11082 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11083 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11084 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
11085
11086#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11087 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11088 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11089 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
11090
11091#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11092 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11093 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11094 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
11095
11096#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11097 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
11098 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11099 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
11100
11101
11102#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11103 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
11104 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11105 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
11106
11107#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11108 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
11109 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
11110 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
11111
11112
11113#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11114 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
11115 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11116 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
11117
11118#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11119 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
11120 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11121 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
11122
11123#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
11124 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
11125 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11126 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
11127
11128
11129
11130#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
11131 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11132 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11133 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
11134
11135#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
11136 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11137 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
11138 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
11139
11140#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
11141 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11142 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
11143 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
11144
11145
11146#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
11147 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11148 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11149 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
11150
11151#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
11152 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11153 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11154 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
11155
11156#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
11157 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11158 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11159 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
11160
11161#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
11162 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
11163 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
11164 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
11165
11166
11167#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
11168 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11169 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11170 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
11171
11172#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
11173 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11174 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11175 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
11176
11177#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
11178 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11179 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11180 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
11181
11182#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
11183 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
11184 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
11185 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
11186
11187
11188#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
11189 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11190 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11191 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
11192
11193#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
11194 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11195 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11196 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
11197
11198#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
11199 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11200 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11201 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
11202
11203#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
11204 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
11205 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11206 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
11207
11208
11209#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
11210 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
11211 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
11212 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
11213
11214#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
11215 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
11216 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
11217 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
11218
11219
11220#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
11221 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
11222 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11223 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
11224
11225#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
11226 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
11227 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11228 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
11229
11230#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
11231 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
11232 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
11233 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
11234
11235
11236DECL_INLINE_THROW(uint32_t)
11237iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
11238 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
11239 uintptr_t pfnFunction, uint8_t idxInstr)
11240{
11241 /*
11242 * Assert sanity.
11243 */
11244 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
11245 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
11246 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
11247 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11248
11249 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
11250 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
11251 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
11252 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11253
11254 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11255 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
11256 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
11257 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11258
11259 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11260
11261 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11262
11263#ifdef VBOX_STRICT
11264# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
11265 ( ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
11266 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
11267 : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
11268 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
11269
11270 if (iSegReg == UINT8_MAX)
11271 {
11272 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11273 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11274 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11275 switch (cbMem)
11276 {
11277 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
11278 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
11279 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
11280 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
11281 case 10:
11282 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
11283 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
11284 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
11285 break;
11286 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
11287# if 0
11288 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
11289 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
11290# endif
11291 default: AssertFailed(); break;
11292 }
11293 }
11294 else
11295 {
11296 Assert(iSegReg < 6);
11297 switch (cbMem)
11298 {
11299 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
11300 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
11301 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
11302 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
11303 case 10:
11304 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
11305 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
11306 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
11307 break;
11308 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
11309# if 0
11310 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
11311 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
11312# endif
11313 default: AssertFailed(); break;
11314 }
11315 }
11316# undef IEM_MAP_HLP_FN
11317#endif
11318
11319#ifdef VBOX_STRICT
11320 /*
11321 * Check that the fExec flags we've got make sense.
11322 */
11323 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11324#endif
11325
11326 /*
11327 * To keep things simple we have to commit any pending writes first as we
11328 * may end up making calls.
11329 */
11330 /** @todo we could postpone this till we make the call and reload the
11331 * registers after returning from the call. Not sure if that's sensible or
11332 * not, though. */
11333 off = iemNativeRegFlushPendingWrites(pReNative, off);
11334
11335 /*
11336 * Move/spill/flush stuff out of call-volatile registers.
11337 * This is the easy way out. We could contain this to the tlb-miss branch
11338 * by saving and restoring active stuff here.
11339 */
11340 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
11341 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11342
11343 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
11344 while the tlb-miss codepath will temporarily put it on the stack.
11345 Set the the type to stack here so we don't need to do it twice below. */
11346 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
11347 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
11348 /** @todo use a tmp register from TlbState, since they'll be free after tlb
11349 * lookup is done. */
11350
11351 /*
11352 * Define labels and allocate the result register (trying for the return
11353 * register if we can - which we of course can, given the above call).
11354 */
11355 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11356 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11357 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
11358 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
11359
11360 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
11361
11362 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11363 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11364 : UINT32_MAX;
11365//off=iemNativeEmitBrk(pReNative, off, 0);
11366 /*
11367 * Jump to the TLB lookup code.
11368 */
11369 if (!TlbState.fSkip)
11370 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11371
11372 /*
11373 * tlbmiss:
11374 *
11375 * Call helper to do the fetching.
11376 * We flush all guest register shadow copies here.
11377 */
11378 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11379
11380#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11381 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11382#else
11383 RT_NOREF(idxInstr);
11384#endif
11385
11386 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
11387 if (iSegReg != UINT8_MAX)
11388 {
11389 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11390 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
11391 }
11392
11393 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem */
11394 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem);
11395
11396 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
11397#if 0
11398 off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo, true /*fFlushShadows*/);
11399#else
11400 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
11401 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
11402#endif
11403
11404 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11405 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11406
11407 /* Done setting up parameters, make the call. */
11408 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11409
11410 /*
11411 * Put the output in the right registers.
11412 */
11413 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
11414 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
11415 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
11416
11417 Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
11418 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
11419
11420#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11421 if (!TlbState.fSkip)
11422 {
11423 /* end of tlbsmiss - Jump to the done label. */
11424 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11425 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11426
11427 /*
11428 * tlblookup:
11429 */
11430 off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
11431 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
11432 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
11433
11434 /*
11435 * Lookup tail code, specific to the MC when the above is moved into a separate function.
11436 */
11437 /* [idxVarUnmapInfo] = 0 - allocate register for it. There must be free ones now, so no spilling required. */
11438 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
11439
11440 /*
11441 * tlbdone:
11442 */
11443 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11444 }
11445#else
11446 RT_NOREF(fAccess, fAlignMask);
11447#endif
11448
11449 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
11450 iemNativeVarRegisterRelease(pReNative, idxVarMem);
11451
11452 return off;
11453}
11454
11455
11456#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
11457 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
11458 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
11459
11460#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
11461 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
11462 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
11463
11464#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
11465 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
11466 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
11467
11468DECL_INLINE_THROW(uint32_t)
11469iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
11470 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
11471{
11472 /*
11473 * Assert sanity.
11474 */
11475 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
11476 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
11477 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
11478 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
11479#ifdef VBOX_STRICT
11480 switch (fAccess & IEM_ACCESS_TYPE_MASK)
11481 {
11482 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
11483 case IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
11484 case IEM_ACCESS_TYPE_READ: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
11485 default: AssertFailed();
11486 }
11487#else
11488 RT_NOREF(fAccess);
11489#endif
11490
11491 /*
11492 * To keep things simple we have to commit any pending writes first as we
11493 * may end up making calls (there shouldn't be any at this point, so this
11494 * is just for consistency).
11495 */
11496 /** @todo we could postpone this till we make the call and reload the
11497 * registers after returning from the call. Not sure if that's sensible or
11498 * not, though. */
11499 off = iemNativeRegFlushPendingWrites(pReNative, off);
11500
11501 /*
11502 * Move/spill/flush stuff out of call-volatile registers.
11503 *
11504 * We exclude any register holding the bUnmapInfo variable, as we'll be
11505 * checking it after returning from the call and will free it afterwards.
11506 */
11507 /** @todo save+restore active registers and maybe guest shadows in miss
11508 * scenario. */
11509 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
11510
11511 /*
11512 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
11513 * to call the unmap helper function.
11514 *
11515 * The likelyhood of it being zero is higher than for the TLB hit when doing
11516 * the mapping, as a TLB miss for an well aligned and unproblematic memory
11517 * access should also end up with a mapping that won't need special unmapping.
11518 */
11519 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
11520 * should speed up things for the pure interpreter as well when TLBs
11521 * are enabled. */
11522#ifdef RT_ARCH_AMD64
11523 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
11524 {
11525 /* test byte [rbp - xxx], 0ffh */
11526 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11527 pbCodeBuf[off++] = 0xf6;
11528 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
11529 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11530 pbCodeBuf[off++] = 0xff;
11531 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11532 }
11533 else
11534#endif
11535 {
11536 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
11537 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
11538 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
11539 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
11540 }
11541 uint32_t const offJmpFixup = off;
11542 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
11543
11544 /*
11545 * Call the unmap helper function.
11546 */
11547#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
11548 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11549#else
11550 RT_NOREF(idxInstr);
11551#endif
11552
11553 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
11554 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
11555 0 /*offAddend*/, true /*fVarAllowInVolatileReg*/);
11556
11557 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11558 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11559
11560 /* Done setting up parameters, make the call. */
11561 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11562
11563 /* The bUnmapInfo variable is implictly free by these MCs. */
11564 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
11565
11566 /*
11567 * Done, just fixup the jump for the non-call case.
11568 */
11569 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
11570
11571 return off;
11572}
11573
11574
11575
11576/*********************************************************************************************************************************
11577* State and Exceptions *
11578*********************************************************************************************************************************/
11579
11580#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11581#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11582
11583#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11584#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11585#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11586
11587#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11588#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11589#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11590
11591
11592DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
11593{
11594 /** @todo this needs a lot more work later. */
11595 RT_NOREF(pReNative, fForChange);
11596 return off;
11597}
11598
11599
11600/*********************************************************************************************************************************
11601* The native code generator functions for each MC block. *
11602*********************************************************************************************************************************/
11603
11604
11605/*
11606 * Include g_apfnIemNativeRecompileFunctions and associated functions.
11607 *
11608 * This should probably live in it's own file later, but lets see what the
11609 * compile times turn out to be first.
11610 */
11611#include "IEMNativeFunctions.cpp.h"
11612
11613
11614
11615/*********************************************************************************************************************************
11616* Recompiler Core. *
11617*********************************************************************************************************************************/
11618
11619
11620/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
11621static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
11622{
11623 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
11624 pDis->cbCachedInstr += cbMaxRead;
11625 RT_NOREF(cbMinRead);
11626 return VERR_NO_DATA;
11627}
11628
11629
11630/**
11631 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
11632 * @returns pszBuf.
11633 * @param fFlags The flags.
11634 * @param pszBuf The output buffer.
11635 * @param cbBuf The output buffer size. At least 32 bytes.
11636 */
11637DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
11638{
11639 Assert(cbBuf >= 32);
11640 static RTSTRTUPLE const s_aModes[] =
11641 {
11642 /* [00] = */ { RT_STR_TUPLE("16BIT") },
11643 /* [01] = */ { RT_STR_TUPLE("32BIT") },
11644 /* [02] = */ { RT_STR_TUPLE("!2!") },
11645 /* [03] = */ { RT_STR_TUPLE("!3!") },
11646 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
11647 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
11648 /* [06] = */ { RT_STR_TUPLE("!6!") },
11649 /* [07] = */ { RT_STR_TUPLE("!7!") },
11650 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
11651 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
11652 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
11653 /* [0b] = */ { RT_STR_TUPLE("!b!") },
11654 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
11655 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
11656 /* [0e] = */ { RT_STR_TUPLE("!e!") },
11657 /* [0f] = */ { RT_STR_TUPLE("!f!") },
11658 /* [10] = */ { RT_STR_TUPLE("!10!") },
11659 /* [11] = */ { RT_STR_TUPLE("!11!") },
11660 /* [12] = */ { RT_STR_TUPLE("!12!") },
11661 /* [13] = */ { RT_STR_TUPLE("!13!") },
11662 /* [14] = */ { RT_STR_TUPLE("!14!") },
11663 /* [15] = */ { RT_STR_TUPLE("!15!") },
11664 /* [16] = */ { RT_STR_TUPLE("!16!") },
11665 /* [17] = */ { RT_STR_TUPLE("!17!") },
11666 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
11667 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
11668 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
11669 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
11670 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
11671 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
11672 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
11673 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
11674 };
11675 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
11676 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
11677 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
11678
11679 pszBuf[off++] = ' ';
11680 pszBuf[off++] = 'C';
11681 pszBuf[off++] = 'P';
11682 pszBuf[off++] = 'L';
11683 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
11684 Assert(off < 32);
11685
11686 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
11687
11688 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
11689 {
11690 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
11691 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
11692 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
11693 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
11694 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
11695 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
11696 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
11697 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
11698 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
11699 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
11700 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
11701 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
11702 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
11703 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
11704 };
11705 if (fFlags)
11706 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
11707 if (s_aFlags[i].fFlag & fFlags)
11708 {
11709 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
11710 pszBuf[off++] = ' ';
11711 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
11712 off += s_aFlags[i].cchName;
11713 fFlags &= ~s_aFlags[i].fFlag;
11714 if (!fFlags)
11715 break;
11716 }
11717 pszBuf[off] = '\0';
11718
11719 return pszBuf;
11720}
11721
11722
11723DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
11724{
11725 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
11726#if defined(RT_ARCH_AMD64)
11727 static const char * const a_apszMarkers[] =
11728 {
11729 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
11730 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
11731 };
11732#endif
11733
11734 char szDisBuf[512];
11735 DISSTATE Dis;
11736 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
11737 uint32_t const cNative = pTb->Native.cInstructions;
11738 uint32_t offNative = 0;
11739#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
11740 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
11741#endif
11742 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
11743 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
11744 : DISCPUMODE_64BIT;
11745#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
11746 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
11747#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
11748 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
11749#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
11750# error "Port me"
11751#else
11752 csh hDisasm = ~(size_t)0;
11753# if defined(RT_ARCH_AMD64)
11754 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
11755# elif defined(RT_ARCH_ARM64)
11756 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
11757# else
11758# error "Port me"
11759# endif
11760 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
11761#endif
11762
11763 /*
11764 * Print TB info.
11765 */
11766 pHlp->pfnPrintf(pHlp,
11767 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
11768 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
11769 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
11770 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
11771#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
11772 if (pDbgInfo && pDbgInfo->cEntries > 1)
11773 {
11774 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
11775
11776 /*
11777 * This disassembly is driven by the debug info which follows the native
11778 * code and indicates when it starts with the next guest instructions,
11779 * where labels are and such things.
11780 */
11781 uint32_t idxThreadedCall = 0;
11782 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
11783 uint8_t idxRange = UINT8_MAX;
11784 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
11785 uint32_t offRange = 0;
11786 uint32_t offOpcodes = 0;
11787 uint32_t const cbOpcodes = pTb->cbOpcodes;
11788 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
11789 uint32_t const cDbgEntries = pDbgInfo->cEntries;
11790 uint32_t iDbgEntry = 1;
11791 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
11792
11793 while (offNative < cNative)
11794 {
11795 /* If we're at or have passed the point where the next chunk of debug
11796 info starts, process it. */
11797 if (offDbgNativeNext <= offNative)
11798 {
11799 offDbgNativeNext = UINT32_MAX;
11800 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
11801 {
11802 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
11803 {
11804 case kIemTbDbgEntryType_GuestInstruction:
11805 {
11806 /* Did the exec flag change? */
11807 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
11808 {
11809 pHlp->pfnPrintf(pHlp,
11810 " fExec change %#08x -> %#08x %s\n",
11811 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
11812 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
11813 szDisBuf, sizeof(szDisBuf)));
11814 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
11815 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
11816 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
11817 : DISCPUMODE_64BIT;
11818 }
11819
11820 /* New opcode range? We need to fend up a spurious debug info entry here for cases
11821 where the compilation was aborted before the opcode was recorded and the actual
11822 instruction was translated to a threaded call. This may happen when we run out
11823 of ranges, or when some complicated interrupts/FFs are found to be pending or
11824 similar. So, we just deal with it here rather than in the compiler code as it
11825 is a lot simpler to do here. */
11826 if ( idxRange == UINT8_MAX
11827 || idxRange >= cRanges
11828 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
11829 {
11830 idxRange += 1;
11831 if (idxRange < cRanges)
11832 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
11833 else
11834 continue;
11835 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
11836 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
11837 + (pTb->aRanges[idxRange].idxPhysPage == 0
11838 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
11839 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
11840 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
11841 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
11842 pTb->aRanges[idxRange].idxPhysPage);
11843 GCPhysPc += offRange;
11844 }
11845
11846 /* Disassemble the instruction. */
11847 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
11848 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
11849 uint32_t cbInstr = 1;
11850 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
11851 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
11852 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
11853 if (RT_SUCCESS(rc))
11854 {
11855 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
11856 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
11857 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11858 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11859
11860 static unsigned const s_offMarker = 55;
11861 static char const s_szMarker[] = " ; <--- guest";
11862 if (cch < s_offMarker)
11863 {
11864 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
11865 cch = s_offMarker;
11866 }
11867 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
11868 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
11869
11870 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
11871 }
11872 else
11873 {
11874 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
11875 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
11876 cbInstr = 1;
11877 }
11878 GCPhysPc += cbInstr;
11879 offOpcodes += cbInstr;
11880 offRange += cbInstr;
11881 continue;
11882 }
11883
11884 case kIemTbDbgEntryType_ThreadedCall:
11885 pHlp->pfnPrintf(pHlp,
11886 " Call #%u to %s (%u args) - %s\n",
11887 idxThreadedCall,
11888 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
11889 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
11890 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
11891 idxThreadedCall++;
11892 continue;
11893
11894 case kIemTbDbgEntryType_GuestRegShadowing:
11895 {
11896 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
11897 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
11898 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
11899 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
11900 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
11901 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
11902 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
11903 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
11904 else
11905 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
11906 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
11907 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
11908 continue;
11909 }
11910
11911 case kIemTbDbgEntryType_Label:
11912 {
11913 const char *pszName = "what_the_fudge";
11914 const char *pszComment = "";
11915 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
11916 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
11917 {
11918 case kIemNativeLabelType_Return:
11919 pszName = "Return";
11920 break;
11921 case kIemNativeLabelType_ReturnBreak:
11922 pszName = "ReturnBreak";
11923 break;
11924 case kIemNativeLabelType_ReturnWithFlags:
11925 pszName = "ReturnWithFlags";
11926 break;
11927 case kIemNativeLabelType_NonZeroRetOrPassUp:
11928 pszName = "NonZeroRetOrPassUp";
11929 break;
11930 case kIemNativeLabelType_RaiseGp0:
11931 pszName = "RaiseGp0";
11932 break;
11933 case kIemNativeLabelType_ObsoleteTb:
11934 pszName = "ObsoleteTb";
11935 break;
11936 case kIemNativeLabelType_NeedCsLimChecking:
11937 pszName = "NeedCsLimChecking";
11938 break;
11939 case kIemNativeLabelType_CheckBranchMiss:
11940 pszName = "CheckBranchMiss";
11941 break;
11942 case kIemNativeLabelType_If:
11943 pszName = "If";
11944 fNumbered = true;
11945 break;
11946 case kIemNativeLabelType_Else:
11947 pszName = "Else";
11948 fNumbered = true;
11949 pszComment = " ; regs state restored pre-if-block";
11950 break;
11951 case kIemNativeLabelType_Endif:
11952 pszName = "Endif";
11953 fNumbered = true;
11954 break;
11955 case kIemNativeLabelType_CheckIrq:
11956 pszName = "CheckIrq_CheckVM";
11957 fNumbered = true;
11958 break;
11959 case kIemNativeLabelType_TlbLookup:
11960 pszName = "TlbLookup";
11961 fNumbered = true;
11962 break;
11963 case kIemNativeLabelType_TlbMiss:
11964 pszName = "TlbMiss";
11965 fNumbered = true;
11966 break;
11967 case kIemNativeLabelType_TlbDone:
11968 pszName = "TlbDone";
11969 fNumbered = true;
11970 break;
11971 case kIemNativeLabelType_Invalid:
11972 case kIemNativeLabelType_End:
11973 break;
11974 }
11975 if (fNumbered)
11976 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
11977 else
11978 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
11979 continue;
11980 }
11981
11982 case kIemTbDbgEntryType_NativeOffset:
11983 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
11984 Assert(offDbgNativeNext > offNative);
11985 break;
11986
11987 default:
11988 AssertFailed();
11989 }
11990 iDbgEntry++;
11991 break;
11992 }
11993 }
11994
11995 /*
11996 * Disassemble the next native instruction.
11997 */
11998 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
11999# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12000 uint32_t cbInstr = sizeof(paNative[0]);
12001 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
12002 if (RT_SUCCESS(rc))
12003 {
12004# if defined(RT_ARCH_AMD64)
12005 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
12006 {
12007 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
12008 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
12009 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
12010 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
12011 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
12012 uInfo & 0x8000 ? "recompiled" : "todo");
12013 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
12014 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
12015 else
12016 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
12017 }
12018 else
12019# endif
12020 {
12021# ifdef RT_ARCH_AMD64
12022 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12023 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12024 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12025 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12026# elif defined(RT_ARCH_ARM64)
12027 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
12028 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12029 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12030# else
12031# error "Port me"
12032# endif
12033 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
12034 }
12035 }
12036 else
12037 {
12038# if defined(RT_ARCH_AMD64)
12039 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
12040 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
12041# elif defined(RT_ARCH_ARM64)
12042 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
12043# else
12044# error "Port me"
12045# endif
12046 cbInstr = sizeof(paNative[0]);
12047 }
12048 offNative += cbInstr / sizeof(paNative[0]);
12049
12050# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12051 cs_insn *pInstr;
12052 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
12053 (uintptr_t)pNativeCur, 1, &pInstr);
12054 if (cInstrs > 0)
12055 {
12056 Assert(cInstrs == 1);
12057# if defined(RT_ARCH_AMD64)
12058 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
12059 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
12060# else
12061 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
12062 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
12063# endif
12064 offNative += pInstr->size / sizeof(*pNativeCur);
12065 cs_free(pInstr, cInstrs);
12066 }
12067 else
12068 {
12069# if defined(RT_ARCH_AMD64)
12070 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
12071 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
12072# else
12073 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
12074# endif
12075 offNative++;
12076 }
12077# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12078 }
12079 }
12080 else
12081#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
12082 {
12083 /*
12084 * No debug info, just disassemble the x86 code and then the native code.
12085 *
12086 * First the guest code:
12087 */
12088 for (unsigned i = 0; i < pTb->cRanges; i++)
12089 {
12090 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
12091 + (pTb->aRanges[i].idxPhysPage == 0
12092 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
12093 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
12094 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
12095 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
12096 unsigned off = pTb->aRanges[i].offOpcodes;
12097 /** @todo this ain't working when crossing pages! */
12098 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
12099 while (off < cbOpcodes)
12100 {
12101 uint32_t cbInstr = 1;
12102 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
12103 &pTb->pabOpcodes[off], cbOpcodes - off,
12104 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
12105 if (RT_SUCCESS(rc))
12106 {
12107 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12108 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12109 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12110 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12111 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
12112 GCPhysPc += cbInstr;
12113 off += cbInstr;
12114 }
12115 else
12116 {
12117 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
12118 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
12119 break;
12120 }
12121 }
12122 }
12123
12124 /*
12125 * Then the native code:
12126 */
12127 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
12128 while (offNative < cNative)
12129 {
12130 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
12131# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12132 uint32_t cbInstr = sizeof(paNative[0]);
12133 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
12134 if (RT_SUCCESS(rc))
12135 {
12136# if defined(RT_ARCH_AMD64)
12137 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
12138 {
12139 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
12140 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
12141 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
12142 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
12143 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
12144 uInfo & 0x8000 ? "recompiled" : "todo");
12145 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
12146 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
12147 else
12148 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
12149 }
12150 else
12151# endif
12152 {
12153# ifdef RT_ARCH_AMD64
12154 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
12155 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
12156 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12157 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12158# elif defined(RT_ARCH_ARM64)
12159 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
12160 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
12161 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
12162# else
12163# error "Port me"
12164# endif
12165 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
12166 }
12167 }
12168 else
12169 {
12170# if defined(RT_ARCH_AMD64)
12171 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
12172 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
12173# else
12174 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
12175# endif
12176 cbInstr = sizeof(paNative[0]);
12177 }
12178 offNative += cbInstr / sizeof(paNative[0]);
12179
12180# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12181 cs_insn *pInstr;
12182 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
12183 (uintptr_t)pNativeCur, 1, &pInstr);
12184 if (cInstrs > 0)
12185 {
12186 Assert(cInstrs == 1);
12187# if defined(RT_ARCH_AMD64)
12188 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
12189 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
12190# else
12191 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
12192 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
12193# endif
12194 offNative += pInstr->size / sizeof(*pNativeCur);
12195 cs_free(pInstr, cInstrs);
12196 }
12197 else
12198 {
12199# if defined(RT_ARCH_AMD64)
12200 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
12201 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
12202# else
12203 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
12204# endif
12205 offNative++;
12206 }
12207# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
12208 }
12209 }
12210
12211#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
12212 /* Cleanup. */
12213 cs_close(&hDisasm);
12214#endif
12215}
12216
12217
12218/**
12219 * Recompiles the given threaded TB into a native one.
12220 *
12221 * In case of failure the translation block will be returned as-is.
12222 *
12223 * @returns pTb.
12224 * @param pVCpu The cross context virtual CPU structure of the calling
12225 * thread.
12226 * @param pTb The threaded translation to recompile to native.
12227 */
12228DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
12229{
12230 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
12231
12232 /*
12233 * The first time thru, we allocate the recompiler state, the other times
12234 * we just need to reset it before using it again.
12235 */
12236 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
12237 if (RT_LIKELY(pReNative))
12238 iemNativeReInit(pReNative, pTb);
12239 else
12240 {
12241 pReNative = iemNativeInit(pVCpu, pTb);
12242 AssertReturn(pReNative, pTb);
12243 }
12244
12245 /*
12246 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
12247 * for aborting if an error happens.
12248 */
12249 uint32_t cCallsLeft = pTb->Thrd.cCalls;
12250#ifdef LOG_ENABLED
12251 uint32_t const cCallsOrg = cCallsLeft;
12252#endif
12253 uint32_t off = 0;
12254 int rc = VINF_SUCCESS;
12255 IEMNATIVE_TRY_SETJMP(pReNative, rc)
12256 {
12257 /*
12258 * Emit prolog code (fixed).
12259 */
12260 off = iemNativeEmitProlog(pReNative, off);
12261
12262 /*
12263 * Convert the calls to native code.
12264 */
12265#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12266 int32_t iGstInstr = -1;
12267#endif
12268#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
12269 uint32_t cThreadedCalls = 0;
12270 uint32_t cRecompiledCalls = 0;
12271#endif
12272 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
12273 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
12274 while (cCallsLeft-- > 0)
12275 {
12276 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
12277
12278 /*
12279 * Debug info and assembly markup.
12280 */
12281 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
12282 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
12283#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12284 iemNativeDbgInfoAddNativeOffset(pReNative, off);
12285 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
12286 {
12287 if (iGstInstr < (int32_t)pTb->cInstructions)
12288 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
12289 else
12290 Assert(iGstInstr == pTb->cInstructions);
12291 iGstInstr = pCallEntry->idxInstr;
12292 }
12293 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
12294#endif
12295#if defined(VBOX_STRICT)
12296 off = iemNativeEmitMarker(pReNative, off,
12297 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
12298 pCallEntry->enmFunction));
12299#endif
12300#if defined(VBOX_STRICT)
12301 iemNativeRegAssertSanity(pReNative);
12302#endif
12303
12304 /*
12305 * Actual work.
12306 */
12307 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
12308 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)"));
12309 if (pfnRecom) /** @todo stats on this. */
12310 {
12311 off = pfnRecom(pReNative, off, pCallEntry);
12312 STAM_REL_STATS({cRecompiledCalls++;});
12313 }
12314 else
12315 {
12316 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
12317 STAM_REL_STATS({cThreadedCalls++;});
12318 }
12319 Assert(off <= pReNative->cInstrBufAlloc);
12320 Assert(pReNative->cCondDepth == 0);
12321
12322 /*
12323 * Advance.
12324 */
12325 pCallEntry++;
12326 }
12327
12328 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
12329 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
12330 if (!cThreadedCalls)
12331 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
12332
12333 /*
12334 * Emit the epilog code.
12335 */
12336 uint32_t idxReturnLabel;
12337 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
12338
12339 /*
12340 * Generate special jump labels.
12341 */
12342 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
12343 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
12344 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
12345 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
12346 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
12347 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
12348 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
12349 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
12350 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
12351 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
12352 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
12353 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
12354 }
12355 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
12356 {
12357 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
12358 return pTb;
12359 }
12360 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
12361 Assert(off <= pReNative->cInstrBufAlloc);
12362
12363 /*
12364 * Make sure all labels has been defined.
12365 */
12366 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
12367#ifdef VBOX_STRICT
12368 uint32_t const cLabels = pReNative->cLabels;
12369 for (uint32_t i = 0; i < cLabels; i++)
12370 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
12371#endif
12372
12373 /*
12374 * Allocate executable memory, copy over the code we've generated.
12375 */
12376 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
12377 if (pTbAllocator->pDelayedFreeHead)
12378 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
12379
12380 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
12381 AssertReturn(paFinalInstrBuf, pTb);
12382 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
12383
12384 /*
12385 * Apply fixups.
12386 */
12387 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
12388 uint32_t const cFixups = pReNative->cFixups;
12389 for (uint32_t i = 0; i < cFixups; i++)
12390 {
12391 Assert(paFixups[i].off < off);
12392 Assert(paFixups[i].idxLabel < cLabels);
12393 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
12394 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
12395 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
12396 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
12397 switch (paFixups[i].enmType)
12398 {
12399#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
12400 case kIemNativeFixupType_Rel32:
12401 Assert(paFixups[i].off + 4 <= off);
12402 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12403 continue;
12404
12405#elif defined(RT_ARCH_ARM64)
12406 case kIemNativeFixupType_RelImm26At0:
12407 {
12408 Assert(paFixups[i].off < off);
12409 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12410 Assert(offDisp >= -262144 && offDisp < 262144);
12411 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
12412 continue;
12413 }
12414
12415 case kIemNativeFixupType_RelImm19At5:
12416 {
12417 Assert(paFixups[i].off < off);
12418 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12419 Assert(offDisp >= -262144 && offDisp < 262144);
12420 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
12421 continue;
12422 }
12423
12424 case kIemNativeFixupType_RelImm14At5:
12425 {
12426 Assert(paFixups[i].off < off);
12427 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
12428 Assert(offDisp >= -8192 && offDisp < 8192);
12429 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
12430 continue;
12431 }
12432
12433#endif
12434 case kIemNativeFixupType_Invalid:
12435 case kIemNativeFixupType_End:
12436 break;
12437 }
12438 AssertFailed();
12439 }
12440
12441 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
12442 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
12443
12444 /*
12445 * Convert the translation block.
12446 */
12447 RTMemFree(pTb->Thrd.paCalls);
12448 pTb->Native.paInstructions = paFinalInstrBuf;
12449 pTb->Native.cInstructions = off;
12450 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
12451#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
12452 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
12453 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
12454#endif
12455
12456 Assert(pTbAllocator->cThreadedTbs > 0);
12457 pTbAllocator->cThreadedTbs -= 1;
12458 pTbAllocator->cNativeTbs += 1;
12459 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
12460
12461#ifdef LOG_ENABLED
12462 /*
12463 * Disassemble to the log if enabled.
12464 */
12465 if (LogIs3Enabled())
12466 {
12467 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
12468 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
12469# ifdef DEBUG_bird
12470 RTLogFlush(NULL);
12471# endif
12472 }
12473#endif
12474 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
12475
12476 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
12477 return pTb;
12478}
12479
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette