VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103612

Last change on this file since 103612 was 103596, checked in by vboxsync, 14 months ago

VMM/IEM: Fix emitters for IEM_MC_FETCH_FCW()/IEM_MC_FETCH_FSW(), the destination is a variable index and not a register index, need to get one from first, bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 647.7 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103596 2024-02-28 14:59:42Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when it wants to raise a \#NM.
1601 */
1602IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1603{
1604 iemRaiseDeviceNotAvailableJmp(pVCpu);
1605#ifndef _MSC_VER
1606 return VINF_IEM_RAISED_XCPT; /* not reached */
1607#endif
1608}
1609
1610
1611/**
1612 * Used by TB code when it wants to raise a \#UD.
1613 */
1614IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1615{
1616 iemRaiseUndefinedOpcodeJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when detecting opcode changes.
1625 * @see iemThreadeFuncWorkerObsoleteTb
1626 */
1627IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1628{
1629 /* We set fSafeToFree to false where as we're being called in the context
1630 of a TB callback function, which for native TBs means we cannot release
1631 the executable memory till we've returned our way back to iemTbExec as
1632 that return path codes via the native code generated for the TB. */
1633 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1634 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1635 return VINF_IEM_REEXEC_BREAK;
1636}
1637
1638
1639/**
1640 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1643{
1644 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1645 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1646 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1647 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1648 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1649 return VINF_IEM_REEXEC_BREAK;
1650}
1651
1652
1653/**
1654 * Used by TB code when we missed a PC check after a branch.
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1657{
1658 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1659 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1660 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1661 pVCpu->iem.s.pbInstrBuf));
1662 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667
1668/*********************************************************************************************************************************
1669* Helpers: Segmented memory fetches and stores. *
1670*********************************************************************************************************************************/
1671
1672/**
1673 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1674 */
1675IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1676{
1677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1678 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1679#else
1680 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1681#endif
1682}
1683
1684
1685/**
1686 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1687 * to 16 bits.
1688 */
1689IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1690{
1691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1692 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1693#else
1694 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1695#endif
1696}
1697
1698
1699/**
1700 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1701 * to 32 bits.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712/**
1713 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1714 * to 64 bits.
1715 */
1716IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1717{
1718#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1719 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1720#else
1721 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1722#endif
1723}
1724
1725
1726/**
1727 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1728 */
1729IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1730{
1731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1732 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1733#else
1734 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1735#endif
1736}
1737
1738
1739/**
1740 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1741 * to 32 bits.
1742 */
1743IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1744{
1745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1746 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1747#else
1748 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1749#endif
1750}
1751
1752
1753/**
1754 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1755 * to 64 bits.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1769 */
1770IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1771{
1772#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1773 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1774#else
1775 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1776#endif
1777}
1778
1779
1780/**
1781 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1782 * to 64 bits.
1783 */
1784IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1785{
1786#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1787 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1788#else
1789 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1790#endif
1791}
1792
1793
1794/**
1795 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1796 */
1797IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1798{
1799#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1800 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1801#else
1802 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1803#endif
1804}
1805
1806
1807/**
1808 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1809 */
1810IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1811{
1812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1813 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1814#else
1815 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1816#endif
1817}
1818
1819
1820/**
1821 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1826 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1827#else
1828 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1839 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1840#else
1841 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1848 */
1849IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1850{
1851#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1852 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1853#else
1854 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1855#endif
1856}
1857
1858
1859
1860/**
1861 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1866 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1867#else
1868 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1869#endif
1870}
1871
1872
1873/**
1874 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1875 */
1876IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1877{
1878#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1879 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1880#else
1881 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1882#endif
1883}
1884
1885
1886/**
1887 * Used by TB code to store an 32-bit selector value onto a generic stack.
1888 *
1889 * Intel CPUs doesn't do write a whole dword, thus the special function.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1895#else
1896 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1908#else
1909 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1916 */
1917IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1918{
1919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1920 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1921#else
1922 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1923#endif
1924}
1925
1926
1927/**
1928 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1929 */
1930IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1931{
1932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1933 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1934#else
1935 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1936#endif
1937}
1938
1939
1940/**
1941 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1946 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1947#else
1948 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1949#endif
1950}
1951
1952
1953
1954/*********************************************************************************************************************************
1955* Helpers: Flat memory fetches and stores. *
1956*********************************************************************************************************************************/
1957
1958/**
1959 * Used by TB code to load unsigned 8-bit data w/ flat address.
1960 * @note Zero extending the value to 64-bit to simplify assembly.
1961 */
1962IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1963{
1964#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1965 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1966#else
1967 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1968#endif
1969}
1970
1971
1972/**
1973 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1974 * to 16 bits.
1975 * @note Zero extending the value to 64-bit to simplify assembly.
1976 */
1977IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1978{
1979#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1980 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1981#else
1982 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1983#endif
1984}
1985
1986
1987/**
1988 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1989 * to 32 bits.
1990 * @note Zero extending the value to 64-bit to simplify assembly.
1991 */
1992IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1993{
1994#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1995 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1996#else
1997 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1998#endif
1999}
2000
2001
2002/**
2003 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2004 * to 64 bits.
2005 */
2006IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2007{
2008#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2009 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2010#else
2011 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2012#endif
2013}
2014
2015
2016/**
2017 * Used by TB code to load unsigned 16-bit data w/ flat address.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2032 * to 32 bits.
2033 * @note Zero extending the value to 64-bit to simplify assembly.
2034 */
2035IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2036{
2037#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2038 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2039#else
2040 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2041#endif
2042}
2043
2044
2045/**
2046 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2047 * to 64 bits.
2048 * @note Zero extending the value to 64-bit to simplify assembly.
2049 */
2050IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2051{
2052#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2053 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2054#else
2055 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2056#endif
2057}
2058
2059
2060/**
2061 * Used by TB code to load unsigned 32-bit data w/ flat address.
2062 * @note Zero extending the value to 64-bit to simplify assembly.
2063 */
2064IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2065{
2066#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2067 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2068#else
2069 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2070#endif
2071}
2072
2073
2074/**
2075 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2076 * to 64 bits.
2077 * @note Zero extending the value to 64-bit to simplify assembly.
2078 */
2079IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2080{
2081#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2082 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2083#else
2084 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2085#endif
2086}
2087
2088
2089/**
2090 * Used by TB code to load unsigned 64-bit data w/ flat address.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to store unsigned 8-bit data w/ flat address.
2104 */
2105IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2106{
2107#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2108 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2109#else
2110 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2111#endif
2112}
2113
2114
2115/**
2116 * Used by TB code to store unsigned 16-bit data w/ flat address.
2117 */
2118IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2119{
2120#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2121 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2122#else
2123 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2124#endif
2125}
2126
2127
2128/**
2129 * Used by TB code to store unsigned 32-bit data w/ flat address.
2130 */
2131IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2132{
2133#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2134 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2135#else
2136 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2137#endif
2138}
2139
2140
2141/**
2142 * Used by TB code to store unsigned 64-bit data w/ flat address.
2143 */
2144IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2145{
2146#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2147 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2148#else
2149 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2150#endif
2151}
2152
2153
2154
2155/**
2156 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2157 */
2158IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2159{
2160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2161 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2162#else
2163 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2164#endif
2165}
2166
2167
2168/**
2169 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2172{
2173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2174 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2175#else
2176 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2177#endif
2178}
2179
2180
2181/**
2182 * Used by TB code to store a segment selector value onto a flat stack.
2183 *
2184 * Intel CPUs doesn't do write a whole dword, thus the special function.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2190#else
2191 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2203#else
2204 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2211 */
2212IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2213{
2214#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2215 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2216#else
2217 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2218#endif
2219}
2220
2221
2222/**
2223 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2224 */
2225IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2226{
2227#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2228 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2229#else
2230 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2231#endif
2232}
2233
2234
2235/**
2236 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2237 */
2238IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2239{
2240#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2241 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2242#else
2243 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2244#endif
2245}
2246
2247
2248
2249/*********************************************************************************************************************************
2250* Helpers: Segmented memory mapping. *
2251*********************************************************************************************************************************/
2252
2253/**
2254 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2255 * segmentation.
2256 */
2257IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2258 RTGCPTR GCPtrMem, uint8_t iSegReg))
2259{
2260#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2261 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2262#else
2263 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2264#endif
2265}
2266
2267
2268/**
2269 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2270 */
2271IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2272 RTGCPTR GCPtrMem, uint8_t iSegReg))
2273{
2274#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2275 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2276#else
2277 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2278#endif
2279}
2280
2281
2282/**
2283 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2312 * segmentation.
2313 */
2314IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2315 RTGCPTR GCPtrMem, uint8_t iSegReg))
2316{
2317#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2318 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2319#else
2320 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2321#endif
2322}
2323
2324
2325/**
2326 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2327 */
2328IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2329 RTGCPTR GCPtrMem, uint8_t iSegReg))
2330{
2331#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2332 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2333#else
2334 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2335#endif
2336}
2337
2338
2339/**
2340 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2369 * segmentation.
2370 */
2371IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2372 RTGCPTR GCPtrMem, uint8_t iSegReg))
2373{
2374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2375 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2376#else
2377 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2378#endif
2379}
2380
2381
2382/**
2383 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2384 */
2385IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2386 RTGCPTR GCPtrMem, uint8_t iSegReg))
2387{
2388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2389 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2390#else
2391 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2392#endif
2393}
2394
2395
2396/**
2397 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2426 * segmentation.
2427 */
2428IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2429 RTGCPTR GCPtrMem, uint8_t iSegReg))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2432 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2433#else
2434 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2443 RTGCPTR GCPtrMem, uint8_t iSegReg))
2444{
2445#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2446 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2447#else
2448 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2449#endif
2450}
2451
2452
2453/**
2454 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2511 * segmentation.
2512 */
2513IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2514 RTGCPTR GCPtrMem, uint8_t iSegReg))
2515{
2516#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2517 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2518#else
2519 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2520#endif
2521}
2522
2523
2524/**
2525 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2526 */
2527IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2528 RTGCPTR GCPtrMem, uint8_t iSegReg))
2529{
2530#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2531 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2532#else
2533 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2534#endif
2535}
2536
2537
2538/**
2539 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/*********************************************************************************************************************************
2567* Helpers: Flat memory mapping. *
2568*********************************************************************************************************************************/
2569
2570/**
2571 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2572 * address.
2573 */
2574IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2575{
2576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2577 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2578#else
2579 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2580#endif
2581}
2582
2583
2584/**
2585 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2586 */
2587IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2588{
2589#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2590 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2591#else
2592 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2593#endif
2594}
2595
2596
2597/**
2598 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2599 */
2600IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2601{
2602#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2603 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2604#else
2605 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2606#endif
2607}
2608
2609
2610/**
2611 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2612 */
2613IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2614{
2615#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2616 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2617#else
2618 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2619#endif
2620}
2621
2622
2623/**
2624 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2625 * address.
2626 */
2627IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2628{
2629#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2630 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2631#else
2632 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2633#endif
2634}
2635
2636
2637/**
2638 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2639 */
2640IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2644#else
2645 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2654{
2655#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2656 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2657#else
2658 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2659#endif
2660}
2661
2662
2663/**
2664 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2665 */
2666IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2667{
2668#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2669 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2670#else
2671 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2672#endif
2673}
2674
2675
2676/**
2677 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2678 * address.
2679 */
2680IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2681{
2682#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2683 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2684#else
2685 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2686#endif
2687}
2688
2689
2690/**
2691 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2692 */
2693IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2694{
2695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2696 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2697#else
2698 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2699#endif
2700}
2701
2702
2703/**
2704 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2705 */
2706IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2707{
2708#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2709 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2710#else
2711 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2712#endif
2713}
2714
2715
2716/**
2717 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2718 */
2719IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2720{
2721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2722 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2723#else
2724 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2725#endif
2726}
2727
2728
2729/**
2730 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2731 * address.
2732 */
2733IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2734{
2735#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2736 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2737#else
2738 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2739#endif
2740}
2741
2742
2743/**
2744 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2745 */
2746IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2747{
2748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2749 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2750#else
2751 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2752#endif
2753}
2754
2755
2756/**
2757 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2758 */
2759IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2760{
2761#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2762 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2763#else
2764 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2765#endif
2766}
2767
2768
2769/**
2770 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2771 */
2772IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2773{
2774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2775 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2776#else
2777 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2778#endif
2779}
2780
2781
2782/**
2783 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2784 */
2785IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2786{
2787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2788 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2789#else
2790 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2791#endif
2792}
2793
2794
2795/**
2796 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2797 */
2798IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2799{
2800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2801 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2802#else
2803 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2804#endif
2805}
2806
2807
2808/**
2809 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2810 * address.
2811 */
2812IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2813{
2814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2815 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2816#else
2817 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2818#endif
2819}
2820
2821
2822/**
2823 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2824 */
2825IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2826{
2827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2828 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2829#else
2830 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2831#endif
2832}
2833
2834
2835/**
2836 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2839{
2840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2841 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2842#else
2843 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2844#endif
2845}
2846
2847
2848/**
2849 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2850 */
2851IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2852{
2853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2854 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2855#else
2856 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2857#endif
2858}
2859
2860
2861/*********************************************************************************************************************************
2862* Helpers: Commit, rollback & unmap *
2863*********************************************************************************************************************************/
2864
2865/**
2866 * Used by TB code to commit and unmap a read-write memory mapping.
2867 */
2868IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2869{
2870 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2871}
2872
2873
2874/**
2875 * Used by TB code to commit and unmap a read-write memory mapping.
2876 */
2877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2878{
2879 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2880}
2881
2882
2883/**
2884 * Used by TB code to commit and unmap a write-only memory mapping.
2885 */
2886IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2887{
2888 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2889}
2890
2891
2892/**
2893 * Used by TB code to commit and unmap a read-only memory mapping.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2896{
2897 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2898}
2899
2900
2901/**
2902 * Reinitializes the native recompiler state.
2903 *
2904 * Called before starting a new recompile job.
2905 */
2906static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2907{
2908 pReNative->cLabels = 0;
2909 pReNative->bmLabelTypes = 0;
2910 pReNative->cFixups = 0;
2911#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2912 pReNative->pDbgInfo->cEntries = 0;
2913#endif
2914 pReNative->pTbOrg = pTb;
2915 pReNative->cCondDepth = 0;
2916 pReNative->uCondSeqNo = 0;
2917 pReNative->uCheckIrqSeqNo = 0;
2918 pReNative->uTlbSeqNo = 0;
2919
2920 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2921#if IEMNATIVE_HST_GREG_COUNT < 32
2922 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2923#endif
2924 ;
2925 pReNative->Core.bmHstRegsWithGstShadow = 0;
2926 pReNative->Core.bmGstRegShadows = 0;
2927 pReNative->Core.bmVars = 0;
2928 pReNative->Core.bmStack = 0;
2929 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2930 pReNative->Core.u64ArgVars = UINT64_MAX;
2931
2932 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 11);
2933 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2934 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2935 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2936 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2937 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2938 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2939 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2940 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2941 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2942 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2943 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2944
2945 /* Full host register reinit: */
2946 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2947 {
2948 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2949 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2950 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2951 }
2952
2953 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2954 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2955#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2956 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2957#endif
2958#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2959 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2960#endif
2961 );
2962 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2963 {
2964 fRegs &= ~RT_BIT_32(idxReg);
2965 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2966 }
2967
2968 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2969#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2970 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2971#endif
2972#ifdef IEMNATIVE_REG_FIXED_TMP0
2973 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2974#endif
2975 return pReNative;
2976}
2977
2978
2979/**
2980 * Allocates and initializes the native recompiler state.
2981 *
2982 * This is called the first time an EMT wants to recompile something.
2983 *
2984 * @returns Pointer to the new recompiler state.
2985 * @param pVCpu The cross context virtual CPU structure of the calling
2986 * thread.
2987 * @param pTb The TB that's about to be recompiled.
2988 * @thread EMT(pVCpu)
2989 */
2990static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2991{
2992 VMCPU_ASSERT_EMT(pVCpu);
2993
2994 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2995 AssertReturn(pReNative, NULL);
2996
2997 /*
2998 * Try allocate all the buffers and stuff we need.
2999 */
3000 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3001 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3002 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3003#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3004 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3005#endif
3006 if (RT_LIKELY( pReNative->pInstrBuf
3007 && pReNative->paLabels
3008 && pReNative->paFixups)
3009#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3010 && pReNative->pDbgInfo
3011#endif
3012 )
3013 {
3014 /*
3015 * Set the buffer & array sizes on success.
3016 */
3017 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3018 pReNative->cLabelsAlloc = _8K;
3019 pReNative->cFixupsAlloc = _16K;
3020#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3021 pReNative->cDbgInfoAlloc = _16K;
3022#endif
3023
3024 /* Other constant stuff: */
3025 pReNative->pVCpu = pVCpu;
3026
3027 /*
3028 * Done, just need to save it and reinit it.
3029 */
3030 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3031 return iemNativeReInit(pReNative, pTb);
3032 }
3033
3034 /*
3035 * Failed. Cleanup and return.
3036 */
3037 AssertFailed();
3038 RTMemFree(pReNative->pInstrBuf);
3039 RTMemFree(pReNative->paLabels);
3040 RTMemFree(pReNative->paFixups);
3041#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3042 RTMemFree(pReNative->pDbgInfo);
3043#endif
3044 RTMemFree(pReNative);
3045 return NULL;
3046}
3047
3048
3049/**
3050 * Creates a label
3051 *
3052 * If the label does not yet have a defined position,
3053 * call iemNativeLabelDefine() later to set it.
3054 *
3055 * @returns Label ID. Throws VBox status code on failure, so no need to check
3056 * the return value.
3057 * @param pReNative The native recompile state.
3058 * @param enmType The label type.
3059 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3060 * label is not yet defined (default).
3061 * @param uData Data associated with the lable. Only applicable to
3062 * certain type of labels. Default is zero.
3063 */
3064DECL_HIDDEN_THROW(uint32_t)
3065iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3066 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3067{
3068 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3069
3070 /*
3071 * Locate existing label definition.
3072 *
3073 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3074 * and uData is zero.
3075 */
3076 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3077 uint32_t const cLabels = pReNative->cLabels;
3078 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3079#ifndef VBOX_STRICT
3080 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3081 && offWhere == UINT32_MAX
3082 && uData == 0
3083#endif
3084 )
3085 {
3086#ifndef VBOX_STRICT
3087 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3088 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3089 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3090 if (idxLabel < pReNative->cLabels)
3091 return idxLabel;
3092#else
3093 for (uint32_t i = 0; i < cLabels; i++)
3094 if ( paLabels[i].enmType == enmType
3095 && paLabels[i].uData == uData)
3096 {
3097 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3098 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3099 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3100 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3101 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3102 return i;
3103 }
3104 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3105 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3106#endif
3107 }
3108
3109 /*
3110 * Make sure we've got room for another label.
3111 */
3112 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3113 { /* likely */ }
3114 else
3115 {
3116 uint32_t cNew = pReNative->cLabelsAlloc;
3117 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3118 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3119 cNew *= 2;
3120 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3121 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3122 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3123 pReNative->paLabels = paLabels;
3124 pReNative->cLabelsAlloc = cNew;
3125 }
3126
3127 /*
3128 * Define a new label.
3129 */
3130 paLabels[cLabels].off = offWhere;
3131 paLabels[cLabels].enmType = enmType;
3132 paLabels[cLabels].uData = uData;
3133 pReNative->cLabels = cLabels + 1;
3134
3135 Assert((unsigned)enmType < 64);
3136 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3137
3138 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3139 {
3140 Assert(uData == 0);
3141 pReNative->aidxUniqueLabels[enmType] = cLabels;
3142 }
3143
3144 if (offWhere != UINT32_MAX)
3145 {
3146#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3147 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3148 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3149#endif
3150 }
3151 return cLabels;
3152}
3153
3154
3155/**
3156 * Defines the location of an existing label.
3157 *
3158 * @param pReNative The native recompile state.
3159 * @param idxLabel The label to define.
3160 * @param offWhere The position.
3161 */
3162DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3163{
3164 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3165 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3166 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3167 pLabel->off = offWhere;
3168#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3169 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3170 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3171#endif
3172}
3173
3174
3175/**
3176 * Looks up a lable.
3177 *
3178 * @returns Label ID if found, UINT32_MAX if not.
3179 */
3180static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3181 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3182{
3183 Assert((unsigned)enmType < 64);
3184 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3185 {
3186 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3187 return pReNative->aidxUniqueLabels[enmType];
3188
3189 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3190 uint32_t const cLabels = pReNative->cLabels;
3191 for (uint32_t i = 0; i < cLabels; i++)
3192 if ( paLabels[i].enmType == enmType
3193 && paLabels[i].uData == uData
3194 && ( paLabels[i].off == offWhere
3195 || offWhere == UINT32_MAX
3196 || paLabels[i].off == UINT32_MAX))
3197 return i;
3198 }
3199 return UINT32_MAX;
3200}
3201
3202
3203/**
3204 * Adds a fixup.
3205 *
3206 * @throws VBox status code (int) on failure.
3207 * @param pReNative The native recompile state.
3208 * @param offWhere The instruction offset of the fixup location.
3209 * @param idxLabel The target label ID for the fixup.
3210 * @param enmType The fixup type.
3211 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3212 */
3213DECL_HIDDEN_THROW(void)
3214iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3215 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3216{
3217 Assert(idxLabel <= UINT16_MAX);
3218 Assert((unsigned)enmType <= UINT8_MAX);
3219
3220 /*
3221 * Make sure we've room.
3222 */
3223 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3224 uint32_t const cFixups = pReNative->cFixups;
3225 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3226 { /* likely */ }
3227 else
3228 {
3229 uint32_t cNew = pReNative->cFixupsAlloc;
3230 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3231 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3232 cNew *= 2;
3233 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3234 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3235 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3236 pReNative->paFixups = paFixups;
3237 pReNative->cFixupsAlloc = cNew;
3238 }
3239
3240 /*
3241 * Add the fixup.
3242 */
3243 paFixups[cFixups].off = offWhere;
3244 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3245 paFixups[cFixups].enmType = enmType;
3246 paFixups[cFixups].offAddend = offAddend;
3247 pReNative->cFixups = cFixups + 1;
3248}
3249
3250
3251/**
3252 * Slow code path for iemNativeInstrBufEnsure.
3253 */
3254DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3255{
3256 /* Double the buffer size till we meet the request. */
3257 uint32_t cNew = pReNative->cInstrBufAlloc;
3258 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3259 do
3260 cNew *= 2;
3261 while (cNew < off + cInstrReq);
3262
3263 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3264#ifdef RT_ARCH_ARM64
3265 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3266#else
3267 uint32_t const cbMaxInstrBuf = _2M;
3268#endif
3269 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3270
3271 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3272 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3273
3274#ifdef VBOX_STRICT
3275 pReNative->offInstrBufChecked = off + cInstrReq;
3276#endif
3277 pReNative->cInstrBufAlloc = cNew;
3278 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3279}
3280
3281#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3282
3283/**
3284 * Grows the static debug info array used during recompilation.
3285 *
3286 * @returns Pointer to the new debug info block; throws VBox status code on
3287 * failure, so no need to check the return value.
3288 */
3289DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3290{
3291 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3292 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3293 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3294 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3295 pReNative->pDbgInfo = pDbgInfo;
3296 pReNative->cDbgInfoAlloc = cNew;
3297 return pDbgInfo;
3298}
3299
3300
3301/**
3302 * Adds a new debug info uninitialized entry, returning the pointer to it.
3303 */
3304DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3305{
3306 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3307 { /* likely */ }
3308 else
3309 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3310 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3311}
3312
3313
3314/**
3315 * Debug Info: Adds a native offset record, if necessary.
3316 */
3317static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3318{
3319 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3320
3321 /*
3322 * Search backwards to see if we've got a similar record already.
3323 */
3324 uint32_t idx = pDbgInfo->cEntries;
3325 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3326 while (idx-- > idxStop)
3327 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3328 {
3329 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3330 return;
3331 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3332 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3333 break;
3334 }
3335
3336 /*
3337 * Add it.
3338 */
3339 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3340 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3341 pEntry->NativeOffset.offNative = off;
3342}
3343
3344
3345/**
3346 * Debug Info: Record info about a label.
3347 */
3348static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3349{
3350 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3351 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3352 pEntry->Label.uUnused = 0;
3353 pEntry->Label.enmLabel = (uint8_t)enmType;
3354 pEntry->Label.uData = uData;
3355}
3356
3357
3358/**
3359 * Debug Info: Record info about a threaded call.
3360 */
3361static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3362{
3363 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3364 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3365 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3366 pEntry->ThreadedCall.uUnused = 0;
3367 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3368}
3369
3370
3371/**
3372 * Debug Info: Record info about a new guest instruction.
3373 */
3374static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3375{
3376 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3377 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3378 pEntry->GuestInstruction.uUnused = 0;
3379 pEntry->GuestInstruction.fExec = fExec;
3380}
3381
3382
3383/**
3384 * Debug Info: Record info about guest register shadowing.
3385 */
3386static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3387 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3388{
3389 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3390 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3391 pEntry->GuestRegShadowing.uUnused = 0;
3392 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3393 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3394 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3395}
3396
3397#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3398
3399
3400/*********************************************************************************************************************************
3401* Register Allocator *
3402*********************************************************************************************************************************/
3403
3404/**
3405 * Register parameter indexes (indexed by argument number).
3406 */
3407DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3408{
3409 IEMNATIVE_CALL_ARG0_GREG,
3410 IEMNATIVE_CALL_ARG1_GREG,
3411 IEMNATIVE_CALL_ARG2_GREG,
3412 IEMNATIVE_CALL_ARG3_GREG,
3413#if defined(IEMNATIVE_CALL_ARG4_GREG)
3414 IEMNATIVE_CALL_ARG4_GREG,
3415# if defined(IEMNATIVE_CALL_ARG5_GREG)
3416 IEMNATIVE_CALL_ARG5_GREG,
3417# if defined(IEMNATIVE_CALL_ARG6_GREG)
3418 IEMNATIVE_CALL_ARG6_GREG,
3419# if defined(IEMNATIVE_CALL_ARG7_GREG)
3420 IEMNATIVE_CALL_ARG7_GREG,
3421# endif
3422# endif
3423# endif
3424#endif
3425};
3426
3427/**
3428 * Call register masks indexed by argument count.
3429 */
3430DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3431{
3432 0,
3433 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3434 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3435 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3436 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3437 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3438#if defined(IEMNATIVE_CALL_ARG4_GREG)
3439 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3440 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3441# if defined(IEMNATIVE_CALL_ARG5_GREG)
3442 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3443 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3444# if defined(IEMNATIVE_CALL_ARG6_GREG)
3445 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3446 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3447 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3448# if defined(IEMNATIVE_CALL_ARG7_GREG)
3449 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3450 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3451 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3452# endif
3453# endif
3454# endif
3455#endif
3456};
3457
3458#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3459/**
3460 * BP offset of the stack argument slots.
3461 *
3462 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3463 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3464 */
3465DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3466{
3467 IEMNATIVE_FP_OFF_STACK_ARG0,
3468# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3469 IEMNATIVE_FP_OFF_STACK_ARG1,
3470# endif
3471# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3472 IEMNATIVE_FP_OFF_STACK_ARG2,
3473# endif
3474# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3475 IEMNATIVE_FP_OFF_STACK_ARG3,
3476# endif
3477};
3478AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3479#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3480
3481/**
3482 * Info about shadowed guest register values.
3483 * @see IEMNATIVEGSTREG
3484 */
3485static struct
3486{
3487 /** Offset in VMCPU. */
3488 uint32_t off;
3489 /** The field size. */
3490 uint8_t cb;
3491 /** Name (for logging). */
3492 const char *pszName;
3493} const g_aGstShadowInfo[] =
3494{
3495#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3496 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3497 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3498 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3499 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3500 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3501 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3502 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3503 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3504 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3505 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3506 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3507 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3508 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3509 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3510 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3511 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3512 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3513 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3514 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3515 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3516 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3517 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3518 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3519 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3520 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3521 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3522 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3523 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3524 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3525 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3526 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3527 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3528 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3529 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3530 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3531 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3532 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3533 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3534 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3535 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3536 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3537 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3538 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3539 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3540 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3541 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3542#undef CPUMCTX_OFF_AND_SIZE
3543};
3544AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3545
3546
3547/** Host CPU general purpose register names. */
3548DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3549{
3550#ifdef RT_ARCH_AMD64
3551 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3552#elif RT_ARCH_ARM64
3553 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3554 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3555#else
3556# error "port me"
3557#endif
3558};
3559
3560
3561DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3562 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3563{
3564 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3565
3566 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3567 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3568 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3569 return (uint8_t)idxReg;
3570}
3571
3572
3573#if 0 /* unused */
3574/**
3575 * Tries to locate a suitable register in the given register mask.
3576 *
3577 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3578 * failed.
3579 *
3580 * @returns Host register number on success, returns UINT8_MAX on failure.
3581 */
3582static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3583{
3584 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3585 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3586 if (fRegs)
3587 {
3588 /** @todo pick better here: */
3589 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3590
3591 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3592 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3593 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3594 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3595
3596 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3597 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3598 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3599 return idxReg;
3600 }
3601 return UINT8_MAX;
3602}
3603#endif /* unused */
3604
3605
3606/**
3607 * Locate a register, possibly freeing one up.
3608 *
3609 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3610 * failed.
3611 *
3612 * @returns Host register number on success. Returns UINT8_MAX if no registers
3613 * found, the caller is supposed to deal with this and raise a
3614 * allocation type specific status code (if desired).
3615 *
3616 * @throws VBox status code if we're run into trouble spilling a variable of
3617 * recording debug info. Does NOT throw anything if we're out of
3618 * registers, though.
3619 */
3620static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3621 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3622{
3623 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3624 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3625 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3626
3627 /*
3628 * Try a freed register that's shadowing a guest register.
3629 */
3630 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3631 if (fRegs)
3632 {
3633 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3634
3635#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3636 /*
3637 * When we have livness information, we use it to kick out all shadowed
3638 * guest register that will not be needed any more in this TB. If we're
3639 * lucky, this may prevent us from ending up here again.
3640 *
3641 * Note! We must consider the previous entry here so we don't free
3642 * anything that the current threaded function requires (current
3643 * entry is produced by the next threaded function).
3644 */
3645 uint32_t const idxCurCall = pReNative->idxCurCall;
3646 if (idxCurCall > 0)
3647 {
3648 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3649
3650# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3651 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3652 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3653 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3654#else
3655 /* Construct a mask of the registers not in the read or write state.
3656 Note! We could skips writes, if they aren't from us, as this is just
3657 a hack to prevent trashing registers that have just been written
3658 or will be written when we retire the current instruction. */
3659 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3660 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3661 & IEMLIVENESSBIT_MASK;
3662#endif
3663 /* Merge EFLAGS. */
3664 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3665 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3666 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3667 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3668 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3669
3670 /* If it matches any shadowed registers. */
3671 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3672 {
3673 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3674 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3675 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3676
3677 /* See if we've got any unshadowed registers we can return now. */
3678 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3679 if (fUnshadowedRegs)
3680 {
3681 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3682 return (fPreferVolatile
3683 ? ASMBitFirstSetU32(fUnshadowedRegs)
3684 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3685 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3686 - 1;
3687 }
3688 }
3689 }
3690#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3691
3692 unsigned const idxReg = (fPreferVolatile
3693 ? ASMBitFirstSetU32(fRegs)
3694 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3695 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3696 - 1;
3697
3698 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3699 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3700 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3701 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3702
3703 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3704 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3705 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3706 return idxReg;
3707 }
3708
3709 /*
3710 * Try free up a variable that's in a register.
3711 *
3712 * We do two rounds here, first evacuating variables we don't need to be
3713 * saved on the stack, then in the second round move things to the stack.
3714 */
3715 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3716 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3717 {
3718 uint32_t fVars = pReNative->Core.bmVars;
3719 while (fVars)
3720 {
3721 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3722 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3723 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3724 && (RT_BIT_32(idxReg) & fRegMask)
3725 && ( iLoop == 0
3726 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3727 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3728 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3729 {
3730 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3731 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3732 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3733 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3734 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3735 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3736
3737 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3738 {
3739 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3740 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3741 }
3742
3743 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3744 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3745
3746 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3747 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3748 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3749 return idxReg;
3750 }
3751 fVars &= ~RT_BIT_32(idxVar);
3752 }
3753 }
3754
3755 return UINT8_MAX;
3756}
3757
3758
3759/**
3760 * Reassigns a variable to a different register specified by the caller.
3761 *
3762 * @returns The new code buffer position.
3763 * @param pReNative The native recompile state.
3764 * @param off The current code buffer position.
3765 * @param idxVar The variable index.
3766 * @param idxRegOld The old host register number.
3767 * @param idxRegNew The new host register number.
3768 * @param pszCaller The caller for logging.
3769 */
3770static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3771 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3772{
3773 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3774 RT_NOREF(pszCaller);
3775
3776 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3777
3778 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3779 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3780 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3781 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3782
3783 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3784 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3785 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3786 if (fGstRegShadows)
3787 {
3788 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3789 | RT_BIT_32(idxRegNew);
3790 while (fGstRegShadows)
3791 {
3792 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3793 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3794
3795 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3796 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3797 }
3798 }
3799
3800 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3801 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3802 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3803 return off;
3804}
3805
3806
3807/**
3808 * Moves a variable to a different register or spills it onto the stack.
3809 *
3810 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3811 * kinds can easily be recreated if needed later.
3812 *
3813 * @returns The new code buffer position.
3814 * @param pReNative The native recompile state.
3815 * @param off The current code buffer position.
3816 * @param idxVar The variable index.
3817 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3818 * call-volatile registers.
3819 */
3820static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3821 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3822{
3823 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3824 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3825 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3826
3827 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3828 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3829 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3830 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3831 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3832 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3833 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3834 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3835 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3836
3837
3838 /** @todo Add statistics on this.*/
3839 /** @todo Implement basic variable liveness analysis (python) so variables
3840 * can be freed immediately once no longer used. This has the potential to
3841 * be trashing registers and stack for dead variables.
3842 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3843
3844 /*
3845 * First try move it to a different register, as that's cheaper.
3846 */
3847 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3848 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3849 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3850 if (fRegs)
3851 {
3852 /* Avoid using shadow registers, if possible. */
3853 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3854 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3855 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3856 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3857 }
3858
3859 /*
3860 * Otherwise we must spill the register onto the stack.
3861 */
3862 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3863 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3864 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3865 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3866
3867 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3868 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3870 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3871 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3872 return off;
3873}
3874
3875
3876/**
3877 * Allocates a temporary host general purpose register.
3878 *
3879 * This may emit code to save register content onto the stack in order to free
3880 * up a register.
3881 *
3882 * @returns The host register number; throws VBox status code on failure,
3883 * so no need to check the return value.
3884 * @param pReNative The native recompile state.
3885 * @param poff Pointer to the variable with the code buffer position.
3886 * This will be update if we need to move a variable from
3887 * register to stack in order to satisfy the request.
3888 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3889 * registers (@c true, default) or the other way around
3890 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3891 */
3892DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3893{
3894 /*
3895 * Try find a completely unused register, preferably a call-volatile one.
3896 */
3897 uint8_t idxReg;
3898 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3899 & ~pReNative->Core.bmHstRegsWithGstShadow
3900 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3901 if (fRegs)
3902 {
3903 if (fPreferVolatile)
3904 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3905 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3906 else
3907 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3908 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3909 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3910 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3911 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3912 }
3913 else
3914 {
3915 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3916 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3917 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3918 }
3919 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3920}
3921
3922
3923/**
3924 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3925 * registers.
3926 *
3927 * @returns The host register number; throws VBox status code on failure,
3928 * so no need to check the return value.
3929 * @param pReNative The native recompile state.
3930 * @param poff Pointer to the variable with the code buffer position.
3931 * This will be update if we need to move a variable from
3932 * register to stack in order to satisfy the request.
3933 * @param fRegMask Mask of acceptable registers.
3934 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3935 * registers (@c true, default) or the other way around
3936 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3937 */
3938DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3939 bool fPreferVolatile /*= true*/)
3940{
3941 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3942 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3943
3944 /*
3945 * Try find a completely unused register, preferably a call-volatile one.
3946 */
3947 uint8_t idxReg;
3948 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3949 & ~pReNative->Core.bmHstRegsWithGstShadow
3950 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3951 & fRegMask;
3952 if (fRegs)
3953 {
3954 if (fPreferVolatile)
3955 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3956 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3957 else
3958 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3959 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3960 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3961 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3962 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3963 }
3964 else
3965 {
3966 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3967 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3968 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3969 }
3970 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3971}
3972
3973
3974/**
3975 * Allocates a temporary register for loading an immediate value into.
3976 *
3977 * This will emit code to load the immediate, unless there happens to be an
3978 * unused register with the value already loaded.
3979 *
3980 * The caller will not modify the returned register, it must be considered
3981 * read-only. Free using iemNativeRegFreeTmpImm.
3982 *
3983 * @returns The host register number; throws VBox status code on failure, so no
3984 * need to check the return value.
3985 * @param pReNative The native recompile state.
3986 * @param poff Pointer to the variable with the code buffer position.
3987 * @param uImm The immediate value that the register must hold upon
3988 * return.
3989 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3990 * registers (@c true, default) or the other way around
3991 * (@c false).
3992 *
3993 * @note Reusing immediate values has not been implemented yet.
3994 */
3995DECL_HIDDEN_THROW(uint8_t)
3996iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3997{
3998 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3999 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4000 return idxReg;
4001}
4002
4003#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4004
4005# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4006/**
4007 * Helper for iemNativeLivenessGetStateByGstReg.
4008 *
4009 * @returns IEMLIVENESS_STATE_XXX
4010 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4011 * ORed together.
4012 */
4013DECL_FORCE_INLINE(uint32_t)
4014iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4015{
4016 /* INPUT trumps anything else. */
4017 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4018 return IEMLIVENESS_STATE_INPUT;
4019
4020 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4021 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4022 {
4023 /* If not all sub-fields are clobbered they must be considered INPUT. */
4024 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4025 return IEMLIVENESS_STATE_INPUT;
4026 return IEMLIVENESS_STATE_CLOBBERED;
4027 }
4028
4029 /* XCPT_OR_CALL trumps UNUSED. */
4030 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4031 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4032
4033 return IEMLIVENESS_STATE_UNUSED;
4034}
4035# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4036
4037
4038DECL_FORCE_INLINE(uint32_t)
4039iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4040{
4041# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4042 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4043 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4044# else
4045 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4046 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4047 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4048 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4049# endif
4050}
4051
4052
4053DECL_FORCE_INLINE(uint32_t)
4054iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4055{
4056 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4057 if (enmGstReg == kIemNativeGstReg_EFlags)
4058 {
4059 /* Merge the eflags states to one. */
4060# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4061 uRet = RT_BIT_32(uRet);
4062 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4063 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4064 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4065 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4066 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4067 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4068 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4069# else
4070 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4071 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4072 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4073 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4074 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4075 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4076 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4077# endif
4078 }
4079 return uRet;
4080}
4081
4082
4083# ifdef VBOX_STRICT
4084/** For assertions only, user checks that idxCurCall isn't zerow. */
4085DECL_FORCE_INLINE(uint32_t)
4086iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4087{
4088 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4089}
4090# endif /* VBOX_STRICT */
4091
4092#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4093
4094/**
4095 * Marks host register @a idxHstReg as containing a shadow copy of guest
4096 * register @a enmGstReg.
4097 *
4098 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4099 * host register before calling.
4100 */
4101DECL_FORCE_INLINE(void)
4102iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4103{
4104 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4105 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4106 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4107
4108 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4109 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4110 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4111 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4112#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4113 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4114 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4115#else
4116 RT_NOREF(off);
4117#endif
4118}
4119
4120
4121/**
4122 * Clear any guest register shadow claims from @a idxHstReg.
4123 *
4124 * The register does not need to be shadowing any guest registers.
4125 */
4126DECL_FORCE_INLINE(void)
4127iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4128{
4129 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4130 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4131 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4132 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4133 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4134
4135#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4136 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4137 if (fGstRegs)
4138 {
4139 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4140 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4141 while (fGstRegs)
4142 {
4143 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4144 fGstRegs &= ~RT_BIT_64(iGstReg);
4145 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4146 }
4147 }
4148#else
4149 RT_NOREF(off);
4150#endif
4151
4152 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4153 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4154 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4155}
4156
4157
4158/**
4159 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4160 * and global overview flags.
4161 */
4162DECL_FORCE_INLINE(void)
4163iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4164{
4165 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4166 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4167 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4168 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4169 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4170 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4171 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4172
4173#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4174 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4175 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4176#else
4177 RT_NOREF(off);
4178#endif
4179
4180 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4181 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4182 if (!fGstRegShadowsNew)
4183 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4184 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4185}
4186
4187
4188#if 0 /* unused */
4189/**
4190 * Clear any guest register shadow claim for @a enmGstReg.
4191 */
4192DECL_FORCE_INLINE(void)
4193iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4194{
4195 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4196 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4197 {
4198 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4199 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4200 }
4201}
4202#endif
4203
4204
4205/**
4206 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4207 * as the new shadow of it.
4208 *
4209 * Unlike the other guest reg shadow helpers, this does the logging for you.
4210 * However, it is the liveness state is not asserted here, the caller must do
4211 * that.
4212 */
4213DECL_FORCE_INLINE(void)
4214iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4215 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4216{
4217 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4218 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4219 {
4220 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4221 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4222 if (idxHstRegOld == idxHstRegNew)
4223 return;
4224 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4225 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4226 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4227 }
4228 else
4229 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4230 g_aGstShadowInfo[enmGstReg].pszName));
4231 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4232}
4233
4234
4235/**
4236 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4237 * to @a idxRegTo.
4238 */
4239DECL_FORCE_INLINE(void)
4240iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4241 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4242{
4243 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4244 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4245 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4246 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4247 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4248 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4249 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4250 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4251 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4252
4253 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4254 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4255 if (!fGstRegShadowsFrom)
4256 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4257 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4258 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4259 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4260#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4261 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4262 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4263#else
4264 RT_NOREF(off);
4265#endif
4266}
4267
4268
4269/**
4270 * Allocates a temporary host general purpose register for keeping a guest
4271 * register value.
4272 *
4273 * Since we may already have a register holding the guest register value,
4274 * code will be emitted to do the loading if that's not the case. Code may also
4275 * be emitted if we have to free up a register to satify the request.
4276 *
4277 * @returns The host register number; throws VBox status code on failure, so no
4278 * need to check the return value.
4279 * @param pReNative The native recompile state.
4280 * @param poff Pointer to the variable with the code buffer
4281 * position. This will be update if we need to move a
4282 * variable from register to stack in order to satisfy
4283 * the request.
4284 * @param enmGstReg The guest register that will is to be updated.
4285 * @param enmIntendedUse How the caller will be using the host register.
4286 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4287 * register is okay (default). The ASSUMPTION here is
4288 * that the caller has already flushed all volatile
4289 * registers, so this is only applied if we allocate a
4290 * new register.
4291 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4292 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4293 */
4294DECL_HIDDEN_THROW(uint8_t)
4295iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4296 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4297 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4298{
4299 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4300#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4301 AssertMsg( fSkipLivenessAssert
4302 || pReNative->idxCurCall == 0
4303 || enmGstReg == kIemNativeGstReg_Pc
4304 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4305 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4306 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4307 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4308 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4309 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4310#endif
4311 RT_NOREF(fSkipLivenessAssert);
4312#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4313 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4314#endif
4315 uint32_t const fRegMask = !fNoVolatileRegs
4316 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4317 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4318
4319 /*
4320 * First check if the guest register value is already in a host register.
4321 */
4322 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4323 {
4324 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4325 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4326 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4327 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4328
4329 /* It's not supposed to be allocated... */
4330 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4331 {
4332 /*
4333 * If the register will trash the guest shadow copy, try find a
4334 * completely unused register we can use instead. If that fails,
4335 * we need to disassociate the host reg from the guest reg.
4336 */
4337 /** @todo would be nice to know if preserving the register is in any way helpful. */
4338 /* If the purpose is calculations, try duplicate the register value as
4339 we'll be clobbering the shadow. */
4340 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4341 && ( ~pReNative->Core.bmHstRegs
4342 & ~pReNative->Core.bmHstRegsWithGstShadow
4343 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4344 {
4345 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4346
4347 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4348
4349 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4350 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4351 g_apszIemNativeHstRegNames[idxRegNew]));
4352 idxReg = idxRegNew;
4353 }
4354 /* If the current register matches the restrictions, go ahead and allocate
4355 it for the caller. */
4356 else if (fRegMask & RT_BIT_32(idxReg))
4357 {
4358 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4359 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4360 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4361 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4362 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4363 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4364 else
4365 {
4366 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4367 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4368 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4369 }
4370 }
4371 /* Otherwise, allocate a register that satisfies the caller and transfer
4372 the shadowing if compatible with the intended use. (This basically
4373 means the call wants a non-volatile register (RSP push/pop scenario).) */
4374 else
4375 {
4376 Assert(fNoVolatileRegs);
4377 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4378 !fNoVolatileRegs
4379 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4380 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4381 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4382 {
4383 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4384 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4385 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4386 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4387 }
4388 else
4389 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4390 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4391 g_apszIemNativeHstRegNames[idxRegNew]));
4392 idxReg = idxRegNew;
4393 }
4394 }
4395 else
4396 {
4397 /*
4398 * Oops. Shadowed guest register already allocated!
4399 *
4400 * Allocate a new register, copy the value and, if updating, the
4401 * guest shadow copy assignment to the new register.
4402 */
4403 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4404 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4405 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4406 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4407
4408 /** @todo share register for readonly access. */
4409 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4410 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4411
4412 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4413 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4414
4415 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4416 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4417 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4418 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4419 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4420 else
4421 {
4422 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4423 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4424 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4425 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4426 }
4427 idxReg = idxRegNew;
4428 }
4429 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4430
4431#ifdef VBOX_STRICT
4432 /* Strict builds: Check that the value is correct. */
4433 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4434#endif
4435
4436 return idxReg;
4437 }
4438
4439 /*
4440 * Allocate a new register, load it with the guest value and designate it as a copy of the
4441 */
4442 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4443
4444 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4445 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4446
4447 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4448 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4449 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4450 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4451
4452 return idxRegNew;
4453}
4454
4455
4456/**
4457 * Allocates a temporary host general purpose register that already holds the
4458 * given guest register value.
4459 *
4460 * The use case for this function is places where the shadowing state cannot be
4461 * modified due to branching and such. This will fail if the we don't have a
4462 * current shadow copy handy or if it's incompatible. The only code that will
4463 * be emitted here is value checking code in strict builds.
4464 *
4465 * The intended use can only be readonly!
4466 *
4467 * @returns The host register number, UINT8_MAX if not present.
4468 * @param pReNative The native recompile state.
4469 * @param poff Pointer to the instruction buffer offset.
4470 * Will be updated in strict builds if a register is
4471 * found.
4472 * @param enmGstReg The guest register that will is to be updated.
4473 * @note In strict builds, this may throw instruction buffer growth failures.
4474 * Non-strict builds will not throw anything.
4475 * @sa iemNativeRegAllocTmpForGuestReg
4476 */
4477DECL_HIDDEN_THROW(uint8_t)
4478iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4479{
4480 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4481#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4482 AssertMsg( pReNative->idxCurCall == 0
4483 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4484 || enmGstReg == kIemNativeGstReg_Pc,
4485 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4486#endif
4487
4488 /*
4489 * First check if the guest register value is already in a host register.
4490 */
4491 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4492 {
4493 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4494 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4495 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4496 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4497
4498 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4499 {
4500 /*
4501 * We only do readonly use here, so easy compared to the other
4502 * variant of this code.
4503 */
4504 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4505 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4506 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4507 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4508 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4509
4510#ifdef VBOX_STRICT
4511 /* Strict builds: Check that the value is correct. */
4512 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4513#else
4514 RT_NOREF(poff);
4515#endif
4516 return idxReg;
4517 }
4518 }
4519
4520 return UINT8_MAX;
4521}
4522
4523
4524DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
4525
4526
4527/**
4528 * Allocates argument registers for a function call.
4529 *
4530 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4531 * need to check the return value.
4532 * @param pReNative The native recompile state.
4533 * @param off The current code buffer offset.
4534 * @param cArgs The number of arguments the function call takes.
4535 */
4536DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4537{
4538 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4539 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4540 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4541 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4542
4543 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4544 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4545 else if (cArgs == 0)
4546 return true;
4547
4548 /*
4549 * Do we get luck and all register are free and not shadowing anything?
4550 */
4551 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4552 for (uint32_t i = 0; i < cArgs; i++)
4553 {
4554 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4555 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4556 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4557 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4558 }
4559 /*
4560 * Okay, not lucky so we have to free up the registers.
4561 */
4562 else
4563 for (uint32_t i = 0; i < cArgs; i++)
4564 {
4565 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4566 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4567 {
4568 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4569 {
4570 case kIemNativeWhat_Var:
4571 {
4572 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4573 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
4574 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4575 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4576 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4577
4578 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4579 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4580 else
4581 {
4582 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4583 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4584 }
4585 break;
4586 }
4587
4588 case kIemNativeWhat_Tmp:
4589 case kIemNativeWhat_Arg:
4590 case kIemNativeWhat_rc:
4591 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4592 default:
4593 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4594 }
4595
4596 }
4597 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4598 {
4599 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4600 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4601 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4602 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4603 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4604 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4605 }
4606 else
4607 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4608 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4609 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4610 }
4611 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4612 return true;
4613}
4614
4615
4616DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4617
4618
4619#if 0
4620/**
4621 * Frees a register assignment of any type.
4622 *
4623 * @param pReNative The native recompile state.
4624 * @param idxHstReg The register to free.
4625 *
4626 * @note Does not update variables.
4627 */
4628DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4629{
4630 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4631 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4632 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4633 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4634 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4635 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4636 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4637 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4638 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4639 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4640 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4641 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4642 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4643 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4644
4645 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4646 /* no flushing, right:
4647 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4648 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4649 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4650 */
4651}
4652#endif
4653
4654
4655/**
4656 * Frees a temporary register.
4657 *
4658 * Any shadow copies of guest registers assigned to the host register will not
4659 * be flushed by this operation.
4660 */
4661DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4662{
4663 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4664 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4665 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4666 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4667 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4668}
4669
4670
4671/**
4672 * Frees a temporary immediate register.
4673 *
4674 * It is assumed that the call has not modified the register, so it still hold
4675 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4676 */
4677DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4678{
4679 iemNativeRegFreeTmp(pReNative, idxHstReg);
4680}
4681
4682
4683/**
4684 * Frees a register assigned to a variable.
4685 *
4686 * The register will be disassociated from the variable.
4687 */
4688DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4689{
4690 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4691 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4692 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4693 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4694 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4695
4696 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4697 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4698 if (!fFlushShadows)
4699 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4700 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4701 else
4702 {
4703 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4704 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4705 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4706 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4707 uint64_t fGstRegShadows = fGstRegShadowsOld;
4708 while (fGstRegShadows)
4709 {
4710 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4711 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4712
4713 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4714 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4715 }
4716 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4717 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4718 }
4719}
4720
4721
4722/**
4723 * Called right before emitting a call instruction to move anything important
4724 * out of call-volatile registers, free and flush the call-volatile registers,
4725 * optionally freeing argument variables.
4726 *
4727 * @returns New code buffer offset, UINT32_MAX on failure.
4728 * @param pReNative The native recompile state.
4729 * @param off The code buffer offset.
4730 * @param cArgs The number of arguments the function call takes.
4731 * It is presumed that the host register part of these have
4732 * been allocated as such already and won't need moving,
4733 * just freeing.
4734 * @param fKeepVars Mask of variables that should keep their register
4735 * assignments. Caller must take care to handle these.
4736 */
4737DECL_HIDDEN_THROW(uint32_t)
4738iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4739{
4740 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4741
4742 /* fKeepVars will reduce this mask. */
4743 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4744
4745 /*
4746 * Move anything important out of volatile registers.
4747 */
4748 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4749 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4750 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4751#ifdef IEMNATIVE_REG_FIXED_TMP0
4752 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4753#endif
4754 & ~g_afIemNativeCallRegs[cArgs];
4755
4756 fRegsToMove &= pReNative->Core.bmHstRegs;
4757 if (!fRegsToMove)
4758 { /* likely */ }
4759 else
4760 {
4761 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4762 while (fRegsToMove != 0)
4763 {
4764 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4765 fRegsToMove &= ~RT_BIT_32(idxReg);
4766
4767 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4768 {
4769 case kIemNativeWhat_Var:
4770 {
4771 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4772 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4773 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4774 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4775 if (!(RT_BIT_32(idxVar) & fKeepVars))
4776 {
4777 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4778 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4779 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4780 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4781 else
4782 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4783 }
4784 else
4785 fRegsToFree &= ~RT_BIT_32(idxReg);
4786 continue;
4787 }
4788
4789 case kIemNativeWhat_Arg:
4790 AssertMsgFailed(("What?!?: %u\n", idxReg));
4791 continue;
4792
4793 case kIemNativeWhat_rc:
4794 case kIemNativeWhat_Tmp:
4795 AssertMsgFailed(("Missing free: %u\n", idxReg));
4796 continue;
4797
4798 case kIemNativeWhat_FixedTmp:
4799 case kIemNativeWhat_pVCpuFixed:
4800 case kIemNativeWhat_pCtxFixed:
4801 case kIemNativeWhat_FixedReserved:
4802 case kIemNativeWhat_Invalid:
4803 case kIemNativeWhat_End:
4804 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4805 }
4806 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4807 }
4808 }
4809
4810 /*
4811 * Do the actual freeing.
4812 */
4813 if (pReNative->Core.bmHstRegs & fRegsToFree)
4814 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4815 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4816 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4817
4818 /* If there are guest register shadows in any call-volatile register, we
4819 have to clear the corrsponding guest register masks for each register. */
4820 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4821 if (fHstRegsWithGstShadow)
4822 {
4823 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4824 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4825 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4826 do
4827 {
4828 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4829 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4830
4831 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4832 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4833 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4834 } while (fHstRegsWithGstShadow != 0);
4835 }
4836
4837 return off;
4838}
4839
4840
4841/**
4842 * Flushes a set of guest register shadow copies.
4843 *
4844 * This is usually done after calling a threaded function or a C-implementation
4845 * of an instruction.
4846 *
4847 * @param pReNative The native recompile state.
4848 * @param fGstRegs Set of guest registers to flush.
4849 */
4850DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4851{
4852 /*
4853 * Reduce the mask by what's currently shadowed
4854 */
4855 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4856 fGstRegs &= bmGstRegShadowsOld;
4857 if (fGstRegs)
4858 {
4859 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4860 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4861 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4862 if (bmGstRegShadowsNew)
4863 {
4864 /*
4865 * Partial.
4866 */
4867 do
4868 {
4869 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4870 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4871 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4872 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4873 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4874
4875 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4876 fGstRegs &= ~fInThisHstReg;
4877 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4878 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4879 if (!fGstRegShadowsNew)
4880 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4881 } while (fGstRegs != 0);
4882 }
4883 else
4884 {
4885 /*
4886 * Clear all.
4887 */
4888 do
4889 {
4890 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4891 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4892 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4893 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4894 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4895
4896 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4897 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4898 } while (fGstRegs != 0);
4899 pReNative->Core.bmHstRegsWithGstShadow = 0;
4900 }
4901 }
4902}
4903
4904
4905/**
4906 * Flushes guest register shadow copies held by a set of host registers.
4907 *
4908 * This is used with the TLB lookup code for ensuring that we don't carry on
4909 * with any guest shadows in volatile registers, as these will get corrupted by
4910 * a TLB miss.
4911 *
4912 * @param pReNative The native recompile state.
4913 * @param fHstRegs Set of host registers to flush guest shadows for.
4914 */
4915DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4916{
4917 /*
4918 * Reduce the mask by what's currently shadowed.
4919 */
4920 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4921 fHstRegs &= bmHstRegsWithGstShadowOld;
4922 if (fHstRegs)
4923 {
4924 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4925 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4926 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4927 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4928 if (bmHstRegsWithGstShadowNew)
4929 {
4930 /*
4931 * Partial (likely).
4932 */
4933 uint64_t fGstShadows = 0;
4934 do
4935 {
4936 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4937 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4938 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4939 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4940
4941 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4942 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4943 fHstRegs &= ~RT_BIT_32(idxHstReg);
4944 } while (fHstRegs != 0);
4945 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4946 }
4947 else
4948 {
4949 /*
4950 * Clear all.
4951 */
4952 do
4953 {
4954 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4955 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4956 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4957 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4958
4959 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4960 fHstRegs &= ~RT_BIT_32(idxHstReg);
4961 } while (fHstRegs != 0);
4962 pReNative->Core.bmGstRegShadows = 0;
4963 }
4964 }
4965}
4966
4967
4968/**
4969 * Restores guest shadow copies in volatile registers.
4970 *
4971 * This is used after calling a helper function (think TLB miss) to restore the
4972 * register state of volatile registers.
4973 *
4974 * @param pReNative The native recompile state.
4975 * @param off The code buffer offset.
4976 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4977 * be active (allocated) w/o asserting. Hack.
4978 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4979 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4980 */
4981DECL_HIDDEN_THROW(uint32_t)
4982iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4983{
4984 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4985 if (fHstRegs)
4986 {
4987 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4988 do
4989 {
4990 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4991
4992 /* It's not fatal if a register is active holding a variable that
4993 shadowing a guest register, ASSUMING all pending guest register
4994 writes were flushed prior to the helper call. However, we'll be
4995 emitting duplicate restores, so it wasts code space. */
4996 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4997 RT_NOREF(fHstRegsActiveShadows);
4998
4999 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5000 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5001 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5002 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5003
5004 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5005 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5006
5007 fHstRegs &= ~RT_BIT_32(idxHstReg);
5008 } while (fHstRegs != 0);
5009 }
5010 return off;
5011}
5012
5013
5014/**
5015 * Flushes delayed write of a specific guest register.
5016 *
5017 * This must be called prior to calling CImpl functions and any helpers that use
5018 * the guest state (like raising exceptions) and such.
5019 *
5020 * This optimization has not yet been implemented. The first target would be
5021 * RIP updates, since these are the most common ones.
5022 */
5023DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5024 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
5025{
5026 RT_NOREF(pReNative, enmClass, idxReg);
5027 return off;
5028}
5029
5030
5031/**
5032 * Flushes any delayed guest register writes.
5033 *
5034 * This must be called prior to calling CImpl functions and any helpers that use
5035 * the guest state (like raising exceptions) and such.
5036 *
5037 * This optimization has not yet been implemented. The first target would be
5038 * RIP updates, since these are the most common ones.
5039 */
5040DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5041{
5042 RT_NOREF(pReNative, off);
5043 return off;
5044}
5045
5046
5047#ifdef VBOX_STRICT
5048/**
5049 * Does internal register allocator sanity checks.
5050 */
5051static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5052{
5053 /*
5054 * Iterate host registers building a guest shadowing set.
5055 */
5056 uint64_t bmGstRegShadows = 0;
5057 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5058 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5059 while (bmHstRegsWithGstShadow)
5060 {
5061 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5062 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5063 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5064
5065 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5066 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5067 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5068 bmGstRegShadows |= fThisGstRegShadows;
5069 while (fThisGstRegShadows)
5070 {
5071 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5072 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5073 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5074 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5075 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5076 }
5077 }
5078 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5079 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5080 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5081
5082 /*
5083 * Now the other way around, checking the guest to host index array.
5084 */
5085 bmHstRegsWithGstShadow = 0;
5086 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5087 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5088 while (bmGstRegShadows)
5089 {
5090 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5091 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5092 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5093
5094 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5095 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5096 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5097 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5098 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5099 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5100 }
5101 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5102 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5103 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5104}
5105#endif
5106
5107
5108/*********************************************************************************************************************************
5109* Code Emitters (larger snippets) *
5110*********************************************************************************************************************************/
5111
5112/**
5113 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5114 * extending to 64-bit width.
5115 *
5116 * @returns New code buffer offset on success, UINT32_MAX on failure.
5117 * @param pReNative .
5118 * @param off The current code buffer position.
5119 * @param idxHstReg The host register to load the guest register value into.
5120 * @param enmGstReg The guest register to load.
5121 *
5122 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5123 * that is something the caller needs to do if applicable.
5124 */
5125DECL_HIDDEN_THROW(uint32_t)
5126iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5127{
5128 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
5129 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5130
5131 switch (g_aGstShadowInfo[enmGstReg].cb)
5132 {
5133 case sizeof(uint64_t):
5134 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5135 case sizeof(uint32_t):
5136 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5137 case sizeof(uint16_t):
5138 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5139#if 0 /* not present in the table. */
5140 case sizeof(uint8_t):
5141 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5142#endif
5143 default:
5144 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5145 }
5146}
5147
5148
5149#ifdef VBOX_STRICT
5150/**
5151 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5152 *
5153 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5154 * Trashes EFLAGS on AMD64.
5155 */
5156static uint32_t
5157iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5158{
5159# ifdef RT_ARCH_AMD64
5160 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5161
5162 /* rol reg64, 32 */
5163 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5164 pbCodeBuf[off++] = 0xc1;
5165 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5166 pbCodeBuf[off++] = 32;
5167
5168 /* test reg32, ffffffffh */
5169 if (idxReg >= 8)
5170 pbCodeBuf[off++] = X86_OP_REX_B;
5171 pbCodeBuf[off++] = 0xf7;
5172 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5173 pbCodeBuf[off++] = 0xff;
5174 pbCodeBuf[off++] = 0xff;
5175 pbCodeBuf[off++] = 0xff;
5176 pbCodeBuf[off++] = 0xff;
5177
5178 /* je/jz +1 */
5179 pbCodeBuf[off++] = 0x74;
5180 pbCodeBuf[off++] = 0x01;
5181
5182 /* int3 */
5183 pbCodeBuf[off++] = 0xcc;
5184
5185 /* rol reg64, 32 */
5186 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5187 pbCodeBuf[off++] = 0xc1;
5188 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5189 pbCodeBuf[off++] = 32;
5190
5191# elif defined(RT_ARCH_ARM64)
5192 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5193 /* lsr tmp0, reg64, #32 */
5194 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5195 /* cbz tmp0, +1 */
5196 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5197 /* brk #0x1100 */
5198 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5199
5200# else
5201# error "Port me!"
5202# endif
5203 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5204 return off;
5205}
5206#endif /* VBOX_STRICT */
5207
5208
5209#ifdef VBOX_STRICT
5210/**
5211 * Emitting code that checks that the content of register @a idxReg is the same
5212 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5213 * instruction if that's not the case.
5214 *
5215 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5216 * Trashes EFLAGS on AMD64.
5217 */
5218static uint32_t
5219iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5220{
5221# ifdef RT_ARCH_AMD64
5222 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5223
5224 /* cmp reg, [mem] */
5225 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5226 {
5227 if (idxReg >= 8)
5228 pbCodeBuf[off++] = X86_OP_REX_R;
5229 pbCodeBuf[off++] = 0x38;
5230 }
5231 else
5232 {
5233 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5234 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5235 else
5236 {
5237 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5238 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5239 else
5240 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5241 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5242 if (idxReg >= 8)
5243 pbCodeBuf[off++] = X86_OP_REX_R;
5244 }
5245 pbCodeBuf[off++] = 0x39;
5246 }
5247 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5248
5249 /* je/jz +1 */
5250 pbCodeBuf[off++] = 0x74;
5251 pbCodeBuf[off++] = 0x01;
5252
5253 /* int3 */
5254 pbCodeBuf[off++] = 0xcc;
5255
5256 /* For values smaller than the register size, we must check that the rest
5257 of the register is all zeros. */
5258 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5259 {
5260 /* test reg64, imm32 */
5261 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5262 pbCodeBuf[off++] = 0xf7;
5263 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5264 pbCodeBuf[off++] = 0;
5265 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5266 pbCodeBuf[off++] = 0xff;
5267 pbCodeBuf[off++] = 0xff;
5268
5269 /* je/jz +1 */
5270 pbCodeBuf[off++] = 0x74;
5271 pbCodeBuf[off++] = 0x01;
5272
5273 /* int3 */
5274 pbCodeBuf[off++] = 0xcc;
5275 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5276 }
5277 else
5278 {
5279 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5280 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5281 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5282 }
5283
5284# elif defined(RT_ARCH_ARM64)
5285 /* mov TMP0, [gstreg] */
5286 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5287
5288 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5289 /* sub tmp0, tmp0, idxReg */
5290 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5291 /* cbz tmp0, +1 */
5292 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5293 /* brk #0x1000+enmGstReg */
5294 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5295 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5296
5297# else
5298# error "Port me!"
5299# endif
5300 return off;
5301}
5302#endif /* VBOX_STRICT */
5303
5304
5305#ifdef VBOX_STRICT
5306/**
5307 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5308 * important bits.
5309 *
5310 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5311 * Trashes EFLAGS on AMD64.
5312 */
5313static uint32_t
5314iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5315{
5316 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5317 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5318 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5319 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5320
5321#ifdef RT_ARCH_AMD64
5322 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5323
5324 /* je/jz +1 */
5325 pbCodeBuf[off++] = 0x74;
5326 pbCodeBuf[off++] = 0x01;
5327
5328 /* int3 */
5329 pbCodeBuf[off++] = 0xcc;
5330
5331# elif defined(RT_ARCH_ARM64)
5332 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5333
5334 /* b.eq +1 */
5335 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5336 /* brk #0x2000 */
5337 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5338
5339# else
5340# error "Port me!"
5341# endif
5342 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5343
5344 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5345 return off;
5346}
5347#endif /* VBOX_STRICT */
5348
5349
5350/**
5351 * Emits a code for checking the return code of a call and rcPassUp, returning
5352 * from the code if either are non-zero.
5353 */
5354DECL_HIDDEN_THROW(uint32_t)
5355iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5356{
5357#ifdef RT_ARCH_AMD64
5358 /*
5359 * AMD64: eax = call status code.
5360 */
5361
5362 /* edx = rcPassUp */
5363 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5364# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5365 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5366# endif
5367
5368 /* edx = eax | rcPassUp */
5369 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5370 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5371 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5372 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5373
5374 /* Jump to non-zero status return path. */
5375 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5376
5377 /* done. */
5378
5379#elif RT_ARCH_ARM64
5380 /*
5381 * ARM64: w0 = call status code.
5382 */
5383# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5384 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5385# endif
5386 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5387
5388 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5389
5390 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5391
5392 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5393 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5394 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5395
5396#else
5397# error "port me"
5398#endif
5399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5400 RT_NOREF_PV(idxInstr);
5401 return off;
5402}
5403
5404
5405/**
5406 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5407 * raising a \#GP(0) if it isn't.
5408 *
5409 * @returns New code buffer offset, UINT32_MAX on failure.
5410 * @param pReNative The native recompile state.
5411 * @param off The code buffer offset.
5412 * @param idxAddrReg The host register with the address to check.
5413 * @param idxInstr The current instruction.
5414 */
5415DECL_HIDDEN_THROW(uint32_t)
5416iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5417{
5418 /*
5419 * Make sure we don't have any outstanding guest register writes as we may
5420 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5421 */
5422 off = iemNativeRegFlushPendingWrites(pReNative, off);
5423
5424#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5425 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5426#else
5427 RT_NOREF(idxInstr);
5428#endif
5429
5430#ifdef RT_ARCH_AMD64
5431 /*
5432 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5433 * return raisexcpt();
5434 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5435 */
5436 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5437
5438 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5439 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5440 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5441 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5442 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5443
5444 iemNativeRegFreeTmp(pReNative, iTmpReg);
5445
5446#elif defined(RT_ARCH_ARM64)
5447 /*
5448 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5449 * return raisexcpt();
5450 * ----
5451 * mov x1, 0x800000000000
5452 * add x1, x0, x1
5453 * cmp xzr, x1, lsr 48
5454 * b.ne .Lraisexcpt
5455 */
5456 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5457
5458 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5459 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5460 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5461 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5462
5463 iemNativeRegFreeTmp(pReNative, iTmpReg);
5464
5465#else
5466# error "Port me"
5467#endif
5468 return off;
5469}
5470
5471
5472/**
5473 * Emits code to check if that the content of @a idxAddrReg is within the limit
5474 * of CS, raising a \#GP(0) if it isn't.
5475 *
5476 * @returns New code buffer offset; throws VBox status code on error.
5477 * @param pReNative The native recompile state.
5478 * @param off The code buffer offset.
5479 * @param idxAddrReg The host register (32-bit) with the address to
5480 * check.
5481 * @param idxInstr The current instruction.
5482 */
5483DECL_HIDDEN_THROW(uint32_t)
5484iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5485 uint8_t idxAddrReg, uint8_t idxInstr)
5486{
5487 /*
5488 * Make sure we don't have any outstanding guest register writes as we may
5489 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5490 */
5491 off = iemNativeRegFlushPendingWrites(pReNative, off);
5492
5493#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5494 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5495#else
5496 RT_NOREF(idxInstr);
5497#endif
5498
5499 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5500 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
5501 kIemNativeGstRegUse_ReadOnly);
5502
5503 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
5504 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5505
5506 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
5507 return off;
5508}
5509
5510
5511/**
5512 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5513 *
5514 * @returns The flush mask.
5515 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5516 * @param fGstShwFlush The starting flush mask.
5517 */
5518DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5519{
5520 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5521 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5522 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5523 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5524 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5525 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5526 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5527 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5528 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5529 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5530 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5531 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5532 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5533 return fGstShwFlush;
5534}
5535
5536
5537/**
5538 * Emits a call to a CImpl function or something similar.
5539 */
5540DECL_HIDDEN_THROW(uint32_t)
5541iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5542 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5543{
5544 /*
5545 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5546 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5547 */
5548 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5549 fGstShwFlush
5550 | RT_BIT_64(kIemNativeGstReg_Pc)
5551 | RT_BIT_64(kIemNativeGstReg_EFlags));
5552 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5553
5554 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5555
5556 /*
5557 * Load the parameters.
5558 */
5559#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5560 /* Special code the hidden VBOXSTRICTRC pointer. */
5561 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5562 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5563 if (cAddParams > 0)
5564 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5565 if (cAddParams > 1)
5566 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5567 if (cAddParams > 2)
5568 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5569 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5570
5571#else
5572 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5573 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5574 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5575 if (cAddParams > 0)
5576 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5577 if (cAddParams > 1)
5578 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5579 if (cAddParams > 2)
5580# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5581 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5582# else
5583 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5584# endif
5585#endif
5586
5587 /*
5588 * Make the call.
5589 */
5590 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5591
5592#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5593 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5594#endif
5595
5596 /*
5597 * Check the status code.
5598 */
5599 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5600}
5601
5602
5603/**
5604 * Emits a call to a threaded worker function.
5605 */
5606DECL_HIDDEN_THROW(uint32_t)
5607iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5608{
5609 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5610 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5611
5612#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5613 /* The threaded function may throw / long jmp, so set current instruction
5614 number if we're counting. */
5615 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5616#endif
5617
5618 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5619
5620#ifdef RT_ARCH_AMD64
5621 /* Load the parameters and emit the call. */
5622# ifdef RT_OS_WINDOWS
5623# ifndef VBOXSTRICTRC_STRICT_ENABLED
5624 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5625 if (cParams > 0)
5626 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5627 if (cParams > 1)
5628 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5629 if (cParams > 2)
5630 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5631# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5632 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5633 if (cParams > 0)
5634 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5635 if (cParams > 1)
5636 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5637 if (cParams > 2)
5638 {
5639 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5640 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5641 }
5642 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5643# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5644# else
5645 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5646 if (cParams > 0)
5647 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5648 if (cParams > 1)
5649 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5650 if (cParams > 2)
5651 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5652# endif
5653
5654 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5655
5656# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5657 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5658# endif
5659
5660#elif RT_ARCH_ARM64
5661 /*
5662 * ARM64:
5663 */
5664 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5665 if (cParams > 0)
5666 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5667 if (cParams > 1)
5668 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5669 if (cParams > 2)
5670 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5671
5672 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5673
5674#else
5675# error "port me"
5676#endif
5677
5678 /*
5679 * Check the status code.
5680 */
5681 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5682
5683 return off;
5684}
5685
5686#ifdef VBOX_WITH_STATISTICS
5687/**
5688 * Emits code to update the thread call statistics.
5689 */
5690DECL_INLINE_THROW(uint32_t)
5691iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5692{
5693 /*
5694 * Update threaded function stats.
5695 */
5696 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
5697 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
5698# if defined(RT_ARCH_ARM64)
5699 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
5700 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
5701 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
5702 iemNativeRegFreeTmp(pReNative, idxTmp1);
5703 iemNativeRegFreeTmp(pReNative, idxTmp2);
5704# else
5705 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
5706# endif
5707 return off;
5708}
5709#endif /* VBOX_WITH_STATISTICS */
5710
5711
5712/**
5713 * Emits the code at the CheckBranchMiss label.
5714 */
5715static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5716{
5717 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5718 if (idxLabel != UINT32_MAX)
5719 {
5720 iemNativeLabelDefine(pReNative, idxLabel, off);
5721
5722 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5723 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5724 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5725
5726 /* jump back to the return sequence. */
5727 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5728 }
5729 return off;
5730}
5731
5732
5733/**
5734 * Emits the code at the NeedCsLimChecking label.
5735 */
5736static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5737{
5738 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5739 if (idxLabel != UINT32_MAX)
5740 {
5741 iemNativeLabelDefine(pReNative, idxLabel, off);
5742
5743 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5745 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5746
5747 /* jump back to the return sequence. */
5748 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5749 }
5750 return off;
5751}
5752
5753
5754/**
5755 * Emits the code at the ObsoleteTb label.
5756 */
5757static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5758{
5759 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5760 if (idxLabel != UINT32_MAX)
5761 {
5762 iemNativeLabelDefine(pReNative, idxLabel, off);
5763
5764 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5765 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5766 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5767
5768 /* jump back to the return sequence. */
5769 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5770 }
5771 return off;
5772}
5773
5774
5775/**
5776 * Emits the code at the RaiseGP0 label.
5777 */
5778static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5779{
5780 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5781 if (idxLabel != UINT32_MAX)
5782 {
5783 iemNativeLabelDefine(pReNative, idxLabel, off);
5784
5785 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5786 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5787 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5788
5789 /* jump back to the return sequence. */
5790 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5791 }
5792 return off;
5793}
5794
5795
5796/**
5797 * Emits the code at the RaiseNm label.
5798 */
5799static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5800{
5801 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
5802 if (idxLabel != UINT32_MAX)
5803 {
5804 iemNativeLabelDefine(pReNative, idxLabel, off);
5805
5806 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
5807 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5808 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
5809
5810 /* jump back to the return sequence. */
5811 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5812 }
5813 return off;
5814}
5815
5816
5817/**
5818 * Emits the code at the RaiseUd label.
5819 */
5820static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5821{
5822 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
5823 if (idxLabel != UINT32_MAX)
5824 {
5825 iemNativeLabelDefine(pReNative, idxLabel, off);
5826
5827 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
5828 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5829 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
5830
5831 /* jump back to the return sequence. */
5832 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5833 }
5834 return off;
5835}
5836
5837
5838/**
5839 * Emits the code at the ReturnWithFlags label (returns
5840 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5841 */
5842static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5843{
5844 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5845 if (idxLabel != UINT32_MAX)
5846 {
5847 iemNativeLabelDefine(pReNative, idxLabel, off);
5848
5849 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5850
5851 /* jump back to the return sequence. */
5852 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5853 }
5854 return off;
5855}
5856
5857
5858/**
5859 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5860 */
5861static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5862{
5863 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5864 if (idxLabel != UINT32_MAX)
5865 {
5866 iemNativeLabelDefine(pReNative, idxLabel, off);
5867
5868 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5869
5870 /* jump back to the return sequence. */
5871 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5872 }
5873 return off;
5874}
5875
5876
5877/**
5878 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5879 */
5880static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5881{
5882 /*
5883 * Generate the rc + rcPassUp fiddling code if needed.
5884 */
5885 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5886 if (idxLabel != UINT32_MAX)
5887 {
5888 iemNativeLabelDefine(pReNative, idxLabel, off);
5889
5890 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5891#ifdef RT_ARCH_AMD64
5892# ifdef RT_OS_WINDOWS
5893# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5894 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5895# endif
5896 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5897 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5898# else
5899 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5900 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5901# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5902 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5903# endif
5904# endif
5905# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5906 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5907# endif
5908
5909#else
5910 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5911 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5912 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5913#endif
5914
5915 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5916 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5917 }
5918 return off;
5919}
5920
5921
5922/**
5923 * Emits a standard epilog.
5924 */
5925static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5926{
5927 *pidxReturnLabel = UINT32_MAX;
5928
5929 /*
5930 * Successful return, so clear the return register (eax, w0).
5931 */
5932 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5933
5934 /*
5935 * Define label for common return point.
5936 */
5937 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5938 *pidxReturnLabel = idxReturn;
5939
5940 /*
5941 * Restore registers and return.
5942 */
5943#ifdef RT_ARCH_AMD64
5944 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5945
5946 /* Reposition esp at the r15 restore point. */
5947 pbCodeBuf[off++] = X86_OP_REX_W;
5948 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5949 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5950 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5951
5952 /* Pop non-volatile registers and return */
5953 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5954 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5955 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5956 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5957 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5958 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5959 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5960 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5961# ifdef RT_OS_WINDOWS
5962 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5963 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5964# endif
5965 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5966 pbCodeBuf[off++] = 0xc9; /* leave */
5967 pbCodeBuf[off++] = 0xc3; /* ret */
5968 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5969
5970#elif RT_ARCH_ARM64
5971 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5972
5973 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5974 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5975 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5976 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5977 IEMNATIVE_FRAME_VAR_SIZE / 8);
5978 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5979 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5980 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5981 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5982 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5983 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5984 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5985 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5986 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5988 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5989 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5990
5991 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5992 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5993 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5994 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5995
5996 /* retab / ret */
5997# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5998 if (1)
5999 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6000 else
6001# endif
6002 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6003
6004#else
6005# error "port me"
6006#endif
6007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6008
6009 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6010}
6011
6012
6013/**
6014 * Emits a standard prolog.
6015 */
6016static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6017{
6018#ifdef RT_ARCH_AMD64
6019 /*
6020 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6021 * reserving 64 bytes for stack variables plus 4 non-register argument
6022 * slots. Fixed register assignment: xBX = pReNative;
6023 *
6024 * Since we always do the same register spilling, we can use the same
6025 * unwind description for all the code.
6026 */
6027 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6028 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6029 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6030 pbCodeBuf[off++] = 0x8b;
6031 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6032 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6033 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6034# ifdef RT_OS_WINDOWS
6035 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6036 pbCodeBuf[off++] = 0x8b;
6037 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6038 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6039 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6040# else
6041 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6042 pbCodeBuf[off++] = 0x8b;
6043 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6044# endif
6045 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6046 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6047 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6048 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6049 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6050 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6051 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6052 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6053
6054# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6055 /* Save the frame pointer. */
6056 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6057# endif
6058
6059 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6060 X86_GREG_xSP,
6061 IEMNATIVE_FRAME_ALIGN_SIZE
6062 + IEMNATIVE_FRAME_VAR_SIZE
6063 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6064 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6065 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6066 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6067 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6068
6069#elif RT_ARCH_ARM64
6070 /*
6071 * We set up a stack frame exactly like on x86, only we have to push the
6072 * return address our selves here. We save all non-volatile registers.
6073 */
6074 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6075
6076# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6077 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6078 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6079 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6080 /* pacibsp */
6081 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6082# endif
6083
6084 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6085 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6086 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6087 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6088 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6089 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6090 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6091 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6092 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6093 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6094 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6095 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6096 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6097 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6098 /* Save the BP and LR (ret address) registers at the top of the frame. */
6099 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6100 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6101 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6102 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6103 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6104 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6105
6106 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6107 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6108
6109 /* mov r28, r0 */
6110 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6111 /* mov r27, r1 */
6112 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6113
6114# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6115 /* Save the frame pointer. */
6116 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6117 ARMV8_A64_REG_X2);
6118# endif
6119
6120#else
6121# error "port me"
6122#endif
6123 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6124 return off;
6125}
6126
6127
6128
6129
6130/*********************************************************************************************************************************
6131* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
6132*********************************************************************************************************************************/
6133
6134#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
6135 { \
6136 Assert(pReNative->Core.bmVars == 0); \
6137 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
6138 Assert(pReNative->Core.bmStack == 0); \
6139 pReNative->fMc = (a_fMcFlags); \
6140 pReNative->fCImpl = (a_fCImplFlags); \
6141 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
6142
6143/** We have to get to the end in recompilation mode, as otherwise we won't
6144 * generate code for all the IEM_MC_IF_XXX branches. */
6145#define IEM_MC_END() \
6146 iemNativeVarFreeAll(pReNative); \
6147 } return off
6148
6149
6150
6151/*********************************************************************************************************************************
6152* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
6153*********************************************************************************************************************************/
6154
6155#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
6156 pReNative->fMc = 0; \
6157 pReNative->fCImpl = (a_fFlags); \
6158 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
6159
6160
6161#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6162 pReNative->fMc = 0; \
6163 pReNative->fCImpl = (a_fFlags); \
6164 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
6165
6166DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6167 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6168 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
6169{
6170 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
6171}
6172
6173
6174#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6175 pReNative->fMc = 0; \
6176 pReNative->fCImpl = (a_fFlags); \
6177 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6178 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
6179
6180DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6181 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6182 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
6183{
6184 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
6185}
6186
6187
6188#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6189 pReNative->fMc = 0; \
6190 pReNative->fCImpl = (a_fFlags); \
6191 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6192 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
6193
6194DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6195 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6196 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
6197 uint64_t uArg2)
6198{
6199 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
6200}
6201
6202
6203
6204/*********************************************************************************************************************************
6205* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
6206*********************************************************************************************************************************/
6207
6208/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
6209 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
6210DECL_INLINE_THROW(uint32_t)
6211iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6212{
6213 /*
6214 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
6215 * return with special status code and make the execution loop deal with
6216 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
6217 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
6218 * could continue w/o interruption, it probably will drop into the
6219 * debugger, so not worth the effort of trying to services it here and we
6220 * just lump it in with the handling of the others.
6221 *
6222 * To simplify the code and the register state management even more (wrt
6223 * immediate in AND operation), we always update the flags and skip the
6224 * extra check associated conditional jump.
6225 */
6226 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
6227 <= UINT32_MAX);
6228#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6229 AssertMsg( pReNative->idxCurCall == 0
6230 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
6231 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
6232#endif
6233
6234 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6235 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6236 true /*fSkipLivenessAssert*/);
6237 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
6238 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
6239 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
6240 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
6241 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6242
6243 /* Free but don't flush the EFLAGS register. */
6244 iemNativeRegFreeTmp(pReNative, idxEflReg);
6245
6246 return off;
6247}
6248
6249
6250/** The VINF_SUCCESS dummy. */
6251template<int const a_rcNormal>
6252DECL_FORCE_INLINE(uint32_t)
6253iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6254{
6255 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
6256 if (a_rcNormal != VINF_SUCCESS)
6257 {
6258#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6259 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6260#else
6261 RT_NOREF_PV(idxInstr);
6262#endif
6263 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
6264 }
6265 return off;
6266}
6267
6268
6269#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
6270 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6271 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6272
6273#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6274 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6275 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6276 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6277
6278/** Same as iemRegAddToRip64AndFinishingNoFlags. */
6279DECL_INLINE_THROW(uint32_t)
6280iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6281{
6282 /* Allocate a temporary PC register. */
6283 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6284
6285 /* Perform the addition and store the result. */
6286 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
6287 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6288
6289 /* Free but don't flush the PC register. */
6290 iemNativeRegFreeTmp(pReNative, idxPcReg);
6291
6292 return off;
6293}
6294
6295
6296#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6297 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6298 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6299
6300#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6301 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6302 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6303 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6304
6305/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6306DECL_INLINE_THROW(uint32_t)
6307iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6308{
6309 /* Allocate a temporary PC register. */
6310 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6311
6312 /* Perform the addition and store the result. */
6313 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6314 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6315
6316 /* Free but don't flush the PC register. */
6317 iemNativeRegFreeTmp(pReNative, idxPcReg);
6318
6319 return off;
6320}
6321
6322
6323#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6324 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6325 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6326
6327#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6328 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6329 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6330 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6331
6332/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6333DECL_INLINE_THROW(uint32_t)
6334iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6335{
6336 /* Allocate a temporary PC register. */
6337 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6338
6339 /* Perform the addition and store the result. */
6340 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6341 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6342 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6343
6344 /* Free but don't flush the PC register. */
6345 iemNativeRegFreeTmp(pReNative, idxPcReg);
6346
6347 return off;
6348}
6349
6350
6351
6352/*********************************************************************************************************************************
6353* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6354*********************************************************************************************************************************/
6355
6356#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6357 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6358 (a_enmEffOpSize), pCallEntry->idxInstr); \
6359 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6360
6361#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6362 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6363 (a_enmEffOpSize), pCallEntry->idxInstr); \
6364 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6365 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6366
6367#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6368 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6369 IEMMODE_16BIT, pCallEntry->idxInstr); \
6370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6371
6372#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6373 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6374 IEMMODE_16BIT, pCallEntry->idxInstr); \
6375 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6376 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6377
6378#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6379 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6380 IEMMODE_64BIT, pCallEntry->idxInstr); \
6381 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6382
6383#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6384 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6385 IEMMODE_64BIT, pCallEntry->idxInstr); \
6386 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6387 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6388
6389/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6390 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6391 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6392DECL_INLINE_THROW(uint32_t)
6393iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6394 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6395{
6396 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6397
6398 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6399 off = iemNativeRegFlushPendingWrites(pReNative, off);
6400
6401 /* Allocate a temporary PC register. */
6402 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6403
6404 /* Perform the addition. */
6405 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6406
6407 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6408 {
6409 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6410 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6411 }
6412 else
6413 {
6414 /* Just truncate the result to 16-bit IP. */
6415 Assert(enmEffOpSize == IEMMODE_16BIT);
6416 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6417 }
6418 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6419
6420 /* Free but don't flush the PC register. */
6421 iemNativeRegFreeTmp(pReNative, idxPcReg);
6422
6423 return off;
6424}
6425
6426
6427#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6428 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6429 (a_enmEffOpSize), pCallEntry->idxInstr); \
6430 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6431
6432#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6433 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6434 (a_enmEffOpSize), pCallEntry->idxInstr); \
6435 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6436 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6437
6438#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6439 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6440 IEMMODE_16BIT, pCallEntry->idxInstr); \
6441 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6442
6443#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6444 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6445 IEMMODE_16BIT, pCallEntry->idxInstr); \
6446 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6447 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6448
6449#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6450 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6451 IEMMODE_32BIT, pCallEntry->idxInstr); \
6452 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6453
6454#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6455 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6456 IEMMODE_32BIT, pCallEntry->idxInstr); \
6457 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6458 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6459
6460/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6461 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6462 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6463DECL_INLINE_THROW(uint32_t)
6464iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6465 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6466{
6467 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6468
6469 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6470 off = iemNativeRegFlushPendingWrites(pReNative, off);
6471
6472 /* Allocate a temporary PC register. */
6473 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6474
6475 /* Perform the addition. */
6476 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6477
6478 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6479 if (enmEffOpSize == IEMMODE_16BIT)
6480 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6481
6482 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6483/** @todo we can skip this in 32-bit FLAT mode. */
6484 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6485
6486 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6487
6488 /* Free but don't flush the PC register. */
6489 iemNativeRegFreeTmp(pReNative, idxPcReg);
6490
6491 return off;
6492}
6493
6494
6495#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6496 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6497 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6498
6499#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6500 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6501 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6502 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6503
6504#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6505 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6506 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6507
6508#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6509 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6510 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6511 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6512
6513#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6514 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6515 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6516
6517#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6518 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6519 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6520 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6521
6522/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6523DECL_INLINE_THROW(uint32_t)
6524iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6525 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6526{
6527 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6528 off = iemNativeRegFlushPendingWrites(pReNative, off);
6529
6530 /* Allocate a temporary PC register. */
6531 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6532
6533 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6534 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6535 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6536 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6537 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6538
6539 /* Free but don't flush the PC register. */
6540 iemNativeRegFreeTmp(pReNative, idxPcReg);
6541
6542 return off;
6543}
6544
6545
6546
6547/*********************************************************************************************************************************
6548* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6549*********************************************************************************************************************************/
6550
6551/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6552#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6553 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6554
6555/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6556#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6557 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6558
6559/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6560#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6561 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6562
6563/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6564 * clears flags. */
6565#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6566 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6567 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6568
6569/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6570 * clears flags. */
6571#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6572 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6573 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6574
6575/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6576 * clears flags. */
6577#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6578 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6579 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6580
6581#undef IEM_MC_SET_RIP_U16_AND_FINISH
6582
6583
6584/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6585#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6586 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6587
6588/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6589#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6590 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6591
6592/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6593 * clears flags. */
6594#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6595 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6596 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6597
6598/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6599 * and clears flags. */
6600#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6601 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6602 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6603
6604#undef IEM_MC_SET_RIP_U32_AND_FINISH
6605
6606
6607/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6608#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6609 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6610
6611/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6612 * and clears flags. */
6613#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6614 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6615 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6616
6617#undef IEM_MC_SET_RIP_U64_AND_FINISH
6618
6619
6620/** Same as iemRegRipJumpU16AndFinishNoFlags,
6621 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6622DECL_INLINE_THROW(uint32_t)
6623iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6624 uint8_t idxInstr, uint8_t cbVar)
6625{
6626 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6627 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
6628
6629 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6630 off = iemNativeRegFlushPendingWrites(pReNative, off);
6631
6632 /* Get a register with the new PC loaded from idxVarPc.
6633 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6634 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6635
6636 /* Check limit (may #GP(0) + exit TB). */
6637 if (!f64Bit)
6638/** @todo we can skip this test in FLAT 32-bit mode. */
6639 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6640 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6641 else if (cbVar > sizeof(uint32_t))
6642 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6643
6644 /* Store the result. */
6645 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6646
6647 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6648 /** @todo implictly free the variable? */
6649
6650 return off;
6651}
6652
6653
6654
6655/*********************************************************************************************************************************
6656* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
6657*********************************************************************************************************************************/
6658
6659#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
6660 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
6661
6662/**
6663 * Emits code to check if a \#NM exception should be raised.
6664 *
6665 * @returns New code buffer offset, UINT32_MAX on failure.
6666 * @param pReNative The native recompile state.
6667 * @param off The code buffer offset.
6668 * @param idxInstr The current instruction.
6669 */
6670DECL_INLINE_THROW(uint32_t)
6671iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6672{
6673 /*
6674 * Make sure we don't have any outstanding guest register writes as we may
6675 * raise an #NM and all guest register must be up to date in CPUMCTX.
6676 *
6677 * @todo r=aeichner Can we postpone this to the RaiseNm path?
6678 */
6679 off = iemNativeRegFlushPendingWrites(pReNative, off);
6680
6681#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6682 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6683#else
6684 RT_NOREF(idxInstr);
6685#endif
6686
6687 /* Allocate a temporary CR0 register. */
6688 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
6689 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
6690
6691 /*
6692 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
6693 * return raisexcpt();
6694 */
6695 /* Test and jump. */
6696 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
6697
6698 /* Free but don't flush the CR0 register. */
6699 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
6700
6701 return off;
6702}
6703
6704
6705#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
6706 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
6707
6708/**
6709 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
6710 *
6711 * @returns New code buffer offset, UINT32_MAX on failure.
6712 * @param pReNative The native recompile state.
6713 * @param off The code buffer offset.
6714 * @param idxInstr The current instruction.
6715 */
6716DECL_INLINE_THROW(uint32_t)
6717iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6718{
6719 /*
6720 * Make sure we don't have any outstanding guest register writes as we may
6721 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
6722 *
6723 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
6724 */
6725 off = iemNativeRegFlushPendingWrites(pReNative, off);
6726
6727#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6728 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6729#else
6730 RT_NOREF(idxInstr);
6731#endif
6732
6733 /* Allocate a temporary CR0 and CR4 register. */
6734 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
6735 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
6736 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
6737 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
6738
6739 /** @todo r=aeichner Optimize this more later to have less compares and branches,
6740 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
6741 * actual performance benefit first). */
6742 /*
6743 * if (cr0 & X86_CR0_EM)
6744 * return raisexcpt();
6745 */
6746 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM, idxLabelRaiseUd);
6747 /*
6748 * if (!(cr4 & X86_CR4_OSFXSR))
6749 * return raisexcpt();
6750 */
6751 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR, idxLabelRaiseUd);
6752 /*
6753 * if (cr0 & X86_CR0_TS)
6754 * return raisexcpt();
6755 */
6756 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_TS, idxLabelRaiseNm);
6757
6758 /* Free but don't flush the CR0 and CR4 register. */
6759 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
6760 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
6761
6762 return off;
6763}
6764
6765
6766
6767/*********************************************************************************************************************************
6768* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
6769*********************************************************************************************************************************/
6770
6771/**
6772 * Pushes an IEM_MC_IF_XXX onto the condition stack.
6773 *
6774 * @returns Pointer to the condition stack entry on success, NULL on failure
6775 * (too many nestings)
6776 */
6777DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
6778{
6779 uint32_t const idxStack = pReNative->cCondDepth;
6780 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
6781
6782 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
6783 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
6784
6785 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
6786 pEntry->fInElse = false;
6787 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
6788 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
6789
6790 return pEntry;
6791}
6792
6793
6794/**
6795 * Start of the if-block, snapshotting the register and variable state.
6796 */
6797DECL_INLINE_THROW(void)
6798iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
6799{
6800 Assert(offIfBlock != UINT32_MAX);
6801 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6802 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6803 Assert(!pEntry->fInElse);
6804
6805 /* Define the start of the IF block if request or for disassembly purposes. */
6806 if (idxLabelIf != UINT32_MAX)
6807 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
6808#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6809 else
6810 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
6811#else
6812 RT_NOREF(offIfBlock);
6813#endif
6814
6815 /* Copy the initial state so we can restore it in the 'else' block. */
6816 pEntry->InitialState = pReNative->Core;
6817}
6818
6819
6820#define IEM_MC_ELSE() } while (0); \
6821 off = iemNativeEmitElse(pReNative, off); \
6822 do {
6823
6824/** Emits code related to IEM_MC_ELSE. */
6825DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6826{
6827 /* Check sanity and get the conditional stack entry. */
6828 Assert(off != UINT32_MAX);
6829 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6830 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6831 Assert(!pEntry->fInElse);
6832
6833 /* Jump to the endif */
6834 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
6835
6836 /* Define the else label and enter the else part of the condition. */
6837 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6838 pEntry->fInElse = true;
6839
6840 /* Snapshot the core state so we can do a merge at the endif and restore
6841 the snapshot we took at the start of the if-block. */
6842 pEntry->IfFinalState = pReNative->Core;
6843 pReNative->Core = pEntry->InitialState;
6844
6845 return off;
6846}
6847
6848
6849#define IEM_MC_ENDIF() } while (0); \
6850 off = iemNativeEmitEndIf(pReNative, off)
6851
6852/** Emits code related to IEM_MC_ENDIF. */
6853DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6854{
6855 /* Check sanity and get the conditional stack entry. */
6856 Assert(off != UINT32_MAX);
6857 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6858 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6859
6860 /*
6861 * Now we have find common group with the core state at the end of the
6862 * if-final. Use the smallest common denominator and just drop anything
6863 * that isn't the same in both states.
6864 */
6865 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
6866 * which is why we're doing this at the end of the else-block.
6867 * But we'd need more info about future for that to be worth the effort. */
6868 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
6869 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
6870 {
6871 /* shadow guest stuff first. */
6872 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
6873 if (fGstRegs)
6874 {
6875 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
6876 do
6877 {
6878 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
6879 fGstRegs &= ~RT_BIT_64(idxGstReg);
6880
6881 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6882 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
6883 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
6884 {
6885 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
6886 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
6887 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
6888 }
6889 } while (fGstRegs);
6890 }
6891 else
6892 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
6893
6894 /* Check variables next. For now we must require them to be identical
6895 or stuff we can recreate. */
6896 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
6897 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
6898 if (fVars)
6899 {
6900 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
6901 do
6902 {
6903 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
6904 fVars &= ~RT_BIT_32(idxVar);
6905
6906 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
6907 {
6908 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
6909 continue;
6910 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6911 {
6912 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6913 if (idxHstReg != UINT8_MAX)
6914 {
6915 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6916 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6917 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
6918 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6919 }
6920 continue;
6921 }
6922 }
6923 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
6924 continue;
6925
6926 /* Irreconcilable, so drop it. */
6927 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6928 if (idxHstReg != UINT8_MAX)
6929 {
6930 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6931 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6932 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
6933 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6934 }
6935 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
6936 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6937 } while (fVars);
6938 }
6939
6940 /* Finally, check that the host register allocations matches. */
6941 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
6942 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
6943 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
6944 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
6945 }
6946
6947 /*
6948 * Define the endif label and maybe the else one if we're still in the 'if' part.
6949 */
6950 if (!pEntry->fInElse)
6951 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6952 else
6953 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
6954 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
6955
6956 /* Pop the conditional stack.*/
6957 pReNative->cCondDepth -= 1;
6958
6959 return off;
6960}
6961
6962
6963#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
6964 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
6965 do {
6966
6967/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
6968DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6969{
6970 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6971
6972 /* Get the eflags. */
6973 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6974 kIemNativeGstRegUse_ReadOnly);
6975
6976 /* Test and jump. */
6977 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6978
6979 /* Free but don't flush the EFlags register. */
6980 iemNativeRegFreeTmp(pReNative, idxEflReg);
6981
6982 /* Make a copy of the core state now as we start the if-block. */
6983 iemNativeCondStartIfBlock(pReNative, off);
6984
6985 return off;
6986}
6987
6988
6989#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
6990 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
6991 do {
6992
6993/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
6994DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6995{
6996 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6997
6998 /* Get the eflags. */
6999 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7000 kIemNativeGstRegUse_ReadOnly);
7001
7002 /* Test and jump. */
7003 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7004
7005 /* Free but don't flush the EFlags register. */
7006 iemNativeRegFreeTmp(pReNative, idxEflReg);
7007
7008 /* Make a copy of the core state now as we start the if-block. */
7009 iemNativeCondStartIfBlock(pReNative, off);
7010
7011 return off;
7012}
7013
7014
7015#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
7016 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
7017 do {
7018
7019/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
7020DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7021{
7022 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7023
7024 /* Get the eflags. */
7025 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7026 kIemNativeGstRegUse_ReadOnly);
7027
7028 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7029 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7030
7031 /* Test and jump. */
7032 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7033
7034 /* Free but don't flush the EFlags register. */
7035 iemNativeRegFreeTmp(pReNative, idxEflReg);
7036
7037 /* Make a copy of the core state now as we start the if-block. */
7038 iemNativeCondStartIfBlock(pReNative, off);
7039
7040 return off;
7041}
7042
7043
7044#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
7045 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
7046 do {
7047
7048/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
7049DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7050{
7051 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7052
7053 /* Get the eflags. */
7054 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7055 kIemNativeGstRegUse_ReadOnly);
7056
7057 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7058 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7059
7060 /* Test and jump. */
7061 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7062
7063 /* Free but don't flush the EFlags register. */
7064 iemNativeRegFreeTmp(pReNative, idxEflReg);
7065
7066 /* Make a copy of the core state now as we start the if-block. */
7067 iemNativeCondStartIfBlock(pReNative, off);
7068
7069 return off;
7070}
7071
7072
7073#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
7074 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
7075 do {
7076
7077#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
7078 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
7079 do {
7080
7081/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
7082DECL_INLINE_THROW(uint32_t)
7083iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7084 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7085{
7086 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7087
7088 /* Get the eflags. */
7089 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7090 kIemNativeGstRegUse_ReadOnly);
7091
7092 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7093 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7094
7095 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7096 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7097 Assert(iBitNo1 != iBitNo2);
7098
7099#ifdef RT_ARCH_AMD64
7100 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
7101
7102 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7103 if (iBitNo1 > iBitNo2)
7104 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7105 else
7106 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7107 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7108
7109#elif defined(RT_ARCH_ARM64)
7110 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7111 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7112
7113 /* and tmpreg, eflreg, #1<<iBitNo1 */
7114 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7115
7116 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7117 if (iBitNo1 > iBitNo2)
7118 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7119 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7120 else
7121 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7122 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7123
7124 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7125
7126#else
7127# error "Port me"
7128#endif
7129
7130 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7131 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7132 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7133
7134 /* Free but don't flush the EFlags and tmp registers. */
7135 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7136 iemNativeRegFreeTmp(pReNative, idxEflReg);
7137
7138 /* Make a copy of the core state now as we start the if-block. */
7139 iemNativeCondStartIfBlock(pReNative, off);
7140
7141 return off;
7142}
7143
7144
7145#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
7146 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
7147 do {
7148
7149#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
7150 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
7151 do {
7152
7153/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
7154 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
7155DECL_INLINE_THROW(uint32_t)
7156iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
7157 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7158{
7159 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7160
7161 /* We need an if-block label for the non-inverted variant. */
7162 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
7163 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
7164
7165 /* Get the eflags. */
7166 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7167 kIemNativeGstRegUse_ReadOnly);
7168
7169 /* Translate the flag masks to bit numbers. */
7170 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7171 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7172
7173 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7174 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7175 Assert(iBitNo1 != iBitNo);
7176
7177 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7178 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7179 Assert(iBitNo2 != iBitNo);
7180 Assert(iBitNo2 != iBitNo1);
7181
7182#ifdef RT_ARCH_AMD64
7183 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
7184#elif defined(RT_ARCH_ARM64)
7185 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7186#endif
7187
7188 /* Check for the lone bit first. */
7189 if (!fInverted)
7190 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7191 else
7192 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
7193
7194 /* Then extract and compare the other two bits. */
7195#ifdef RT_ARCH_AMD64
7196 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7197 if (iBitNo1 > iBitNo2)
7198 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7199 else
7200 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7201 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7202
7203#elif defined(RT_ARCH_ARM64)
7204 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7205
7206 /* and tmpreg, eflreg, #1<<iBitNo1 */
7207 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7208
7209 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7210 if (iBitNo1 > iBitNo2)
7211 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7212 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7213 else
7214 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7215 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7216
7217 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7218
7219#else
7220# error "Port me"
7221#endif
7222
7223 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7224 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7225 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7226
7227 /* Free but don't flush the EFlags and tmp registers. */
7228 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7229 iemNativeRegFreeTmp(pReNative, idxEflReg);
7230
7231 /* Make a copy of the core state now as we start the if-block. */
7232 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
7233
7234 return off;
7235}
7236
7237
7238#define IEM_MC_IF_CX_IS_NZ() \
7239 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
7240 do {
7241
7242/** Emits code for IEM_MC_IF_CX_IS_NZ. */
7243DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7244{
7245 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7246
7247 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7248 kIemNativeGstRegUse_ReadOnly);
7249 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
7250 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7251
7252 iemNativeCondStartIfBlock(pReNative, off);
7253 return off;
7254}
7255
7256
7257#define IEM_MC_IF_ECX_IS_NZ() \
7258 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
7259 do {
7260
7261#define IEM_MC_IF_RCX_IS_NZ() \
7262 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
7263 do {
7264
7265/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
7266DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7267{
7268 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7269
7270 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7271 kIemNativeGstRegUse_ReadOnly);
7272 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
7273 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7274
7275 iemNativeCondStartIfBlock(pReNative, off);
7276 return off;
7277}
7278
7279
7280#define IEM_MC_IF_CX_IS_NOT_ONE() \
7281 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
7282 do {
7283
7284/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
7285DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7286{
7287 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7288
7289 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7290 kIemNativeGstRegUse_ReadOnly);
7291#ifdef RT_ARCH_AMD64
7292 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7293#else
7294 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7295 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7296 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7297#endif
7298 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7299
7300 iemNativeCondStartIfBlock(pReNative, off);
7301 return off;
7302}
7303
7304
7305#define IEM_MC_IF_ECX_IS_NOT_ONE() \
7306 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
7307 do {
7308
7309#define IEM_MC_IF_RCX_IS_NOT_ONE() \
7310 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
7311 do {
7312
7313/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
7314DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7315{
7316 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7317
7318 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7319 kIemNativeGstRegUse_ReadOnly);
7320 if (f64Bit)
7321 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7322 else
7323 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7324 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7325
7326 iemNativeCondStartIfBlock(pReNative, off);
7327 return off;
7328}
7329
7330
7331#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7332 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
7333 do {
7334
7335#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7336 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
7337 do {
7338
7339/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
7340 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7341DECL_INLINE_THROW(uint32_t)
7342iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
7343{
7344 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7345
7346 /* We have to load both RCX and EFLAGS before we can start branching,
7347 otherwise we'll end up in the else-block with an inconsistent
7348 register allocator state.
7349 Doing EFLAGS first as it's more likely to be loaded, right? */
7350 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7351 kIemNativeGstRegUse_ReadOnly);
7352 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7353 kIemNativeGstRegUse_ReadOnly);
7354
7355 /** @todo we could reduce this to a single branch instruction by spending a
7356 * temporary register and some setnz stuff. Not sure if loops are
7357 * worth it. */
7358 /* Check CX. */
7359#ifdef RT_ARCH_AMD64
7360 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7361#else
7362 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7363 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7364 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7365#endif
7366
7367 /* Check the EFlags bit. */
7368 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7369 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7370 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7371 !fCheckIfSet /*fJmpIfSet*/);
7372
7373 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7374 iemNativeRegFreeTmp(pReNative, idxEflReg);
7375
7376 iemNativeCondStartIfBlock(pReNative, off);
7377 return off;
7378}
7379
7380
7381#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7382 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
7383 do {
7384
7385#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7386 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
7387 do {
7388
7389#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7390 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
7391 do {
7392
7393#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7394 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
7395 do {
7396
7397/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
7398 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
7399 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
7400 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7401DECL_INLINE_THROW(uint32_t)
7402iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7403 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
7404{
7405 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7406
7407 /* We have to load both RCX and EFLAGS before we can start branching,
7408 otherwise we'll end up in the else-block with an inconsistent
7409 register allocator state.
7410 Doing EFLAGS first as it's more likely to be loaded, right? */
7411 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7412 kIemNativeGstRegUse_ReadOnly);
7413 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7414 kIemNativeGstRegUse_ReadOnly);
7415
7416 /** @todo we could reduce this to a single branch instruction by spending a
7417 * temporary register and some setnz stuff. Not sure if loops are
7418 * worth it. */
7419 /* Check RCX/ECX. */
7420 if (f64Bit)
7421 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7422 else
7423 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7424
7425 /* Check the EFlags bit. */
7426 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7427 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7428 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7429 !fCheckIfSet /*fJmpIfSet*/);
7430
7431 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7432 iemNativeRegFreeTmp(pReNative, idxEflReg);
7433
7434 iemNativeCondStartIfBlock(pReNative, off);
7435 return off;
7436}
7437
7438
7439
7440/*********************************************************************************************************************************
7441* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7442*********************************************************************************************************************************/
7443/** Number of hidden arguments for CIMPL calls.
7444 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7445#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7446# define IEM_CIMPL_HIDDEN_ARGS 3
7447#else
7448# define IEM_CIMPL_HIDDEN_ARGS 2
7449#endif
7450
7451#define IEM_MC_NOREF(a_Name) \
7452 RT_NOREF_PV(a_Name)
7453
7454#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7455 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7456
7457#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7458 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7459
7460#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7461 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7462
7463#define IEM_MC_LOCAL(a_Type, a_Name) \
7464 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7465
7466#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7467 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7468
7469
7470/**
7471 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7472 */
7473DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7474{
7475 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7476 return IEM_CIMPL_HIDDEN_ARGS;
7477 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7478 return 1;
7479 return 0;
7480}
7481
7482
7483/**
7484 * Internal work that allocates a variable with kind set to
7485 * kIemNativeVarKind_Invalid and no current stack allocation.
7486 *
7487 * The kind will either be set by the caller or later when the variable is first
7488 * assigned a value.
7489 */
7490static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7491{
7492 Assert(cbType > 0 && cbType <= 64);
7493 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7494 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7495 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7496 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7497 pReNative->Core.aVars[idxVar].cbVar = cbType;
7498 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7499 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7500 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7501 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7502 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7503 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7504 pReNative->Core.aVars[idxVar].u.uValue = 0;
7505 return idxVar;
7506}
7507
7508
7509/**
7510 * Internal work that allocates an argument variable w/o setting enmKind.
7511 */
7512static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7513{
7514 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7515 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7516 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7517
7518 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7519 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
7520 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7521 return idxVar;
7522}
7523
7524
7525/**
7526 * Gets the stack slot for a stack variable, allocating one if necessary.
7527 *
7528 * Calling this function implies that the stack slot will contain a valid
7529 * variable value. The caller deals with any register currently assigned to the
7530 * variable, typically by spilling it into the stack slot.
7531 *
7532 * @returns The stack slot number.
7533 * @param pReNative The recompiler state.
7534 * @param idxVar The variable.
7535 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7536 */
7537DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7538{
7539 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7540 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7541
7542 /* Already got a slot? */
7543 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7544 if (idxStackSlot != UINT8_MAX)
7545 {
7546 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7547 return idxStackSlot;
7548 }
7549
7550 /*
7551 * A single slot is easy to allocate.
7552 * Allocate them from the top end, closest to BP, to reduce the displacement.
7553 */
7554 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
7555 {
7556 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7557 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7558 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7559 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7560 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
7561 return (uint8_t)iSlot;
7562 }
7563
7564 /*
7565 * We need more than one stack slot.
7566 *
7567 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7568 */
7569 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7570 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
7571 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
7572 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
7573 uint32_t bmStack = ~pReNative->Core.bmStack;
7574 while (bmStack != UINT32_MAX)
7575 {
7576/** @todo allocate from the top to reduce BP displacement. */
7577 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7578 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7579 if (!(iSlot & fBitAlignMask))
7580 {
7581 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7582 {
7583 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7584 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7585 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
7586 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
7587 return (uint8_t)iSlot;
7588 }
7589 }
7590 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7591 }
7592 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7593}
7594
7595
7596/**
7597 * Changes the variable to a stack variable.
7598 *
7599 * Currently this is s only possible to do the first time the variable is used,
7600 * switching later is can be implemented but not done.
7601 *
7602 * @param pReNative The recompiler state.
7603 * @param idxVar The variable.
7604 * @throws VERR_IEM_VAR_IPE_2
7605 */
7606static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7607{
7608 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7609 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7610 {
7611 /* We could in theory transition from immediate to stack as well, but it
7612 would involve the caller doing work storing the value on the stack. So,
7613 till that's required we only allow transition from invalid. */
7614 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7615 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7616 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7617 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
7618
7619 /* Note! We don't allocate a stack slot here, that's only done when a
7620 slot is actually needed to hold a variable value. */
7621 }
7622}
7623
7624
7625/**
7626 * Sets it to a variable with a constant value.
7627 *
7628 * This does not require stack storage as we know the value and can always
7629 * reload it, unless of course it's referenced.
7630 *
7631 * @param pReNative The recompiler state.
7632 * @param idxVar The variable.
7633 * @param uValue The immediate value.
7634 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7635 */
7636static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7637{
7638 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7639 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
7640 {
7641 /* Only simple transitions for now. */
7642 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7643 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7644 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
7645 }
7646 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7647
7648 pReNative->Core.aVars[idxVar].u.uValue = uValue;
7649 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
7650 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
7651 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
7652}
7653
7654
7655/**
7656 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7657 *
7658 * This does not require stack storage as we know the value and can always
7659 * reload it. Loading is postponed till needed.
7660 *
7661 * @param pReNative The recompiler state.
7662 * @param idxVar The variable.
7663 * @param idxOtherVar The variable to take the (stack) address of.
7664 *
7665 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7666 */
7667static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7668{
7669 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7670 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7671
7672 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7673 {
7674 /* Only simple transitions for now. */
7675 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7676 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7677 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7678 }
7679 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7680
7681 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
7682
7683 /* Update the other variable, ensure it's a stack variable. */
7684 /** @todo handle variables with const values... that'll go boom now. */
7685 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7686 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
7687}
7688
7689
7690/**
7691 * Sets the variable to a reference (pointer) to a guest register reference.
7692 *
7693 * This does not require stack storage as we know the value and can always
7694 * reload it. Loading is postponed till needed.
7695 *
7696 * @param pReNative The recompiler state.
7697 * @param idxVar The variable.
7698 * @param enmRegClass The class guest registers to reference.
7699 * @param idxReg The register within @a enmRegClass to reference.
7700 *
7701 * @throws VERR_IEM_VAR_IPE_2
7702 */
7703static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7704 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7705{
7706 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7707
7708 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
7709 {
7710 /* Only simple transitions for now. */
7711 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7712 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7713 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
7714 }
7715 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7716
7717 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
7718 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
7719}
7720
7721
7722DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7723{
7724 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7725}
7726
7727
7728DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7729{
7730 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7731
7732 /* Since we're using a generic uint64_t value type, we must truncate it if
7733 the variable is smaller otherwise we may end up with too large value when
7734 scaling up a imm8 w/ sign-extension.
7735
7736 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7737 in the bios, bx=1) when running on arm, because clang expect 16-bit
7738 register parameters to have bits 16 and up set to zero. Instead of
7739 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7740 CF value in the result. */
7741 switch (cbType)
7742 {
7743 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7744 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7745 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7746 }
7747 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7748 return idxVar;
7749}
7750
7751
7752DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7753{
7754 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7755 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7756 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7757 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7758
7759 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7760 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
7761 return idxArgVar;
7762}
7763
7764
7765DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7766{
7767 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7768 /* Don't set to stack now, leave that to the first use as for instance
7769 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7770 return idxVar;
7771}
7772
7773
7774DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7775{
7776 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7777
7778 /* Since we're using a generic uint64_t value type, we must truncate it if
7779 the variable is smaller otherwise we may end up with too large value when
7780 scaling up a imm8 w/ sign-extension. */
7781 switch (cbType)
7782 {
7783 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7784 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7785 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7786 }
7787 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7788 return idxVar;
7789}
7790
7791
7792/**
7793 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7794 * fixed till we call iemNativeVarRegisterRelease.
7795 *
7796 * @returns The host register number.
7797 * @param pReNative The recompiler state.
7798 * @param idxVar The variable.
7799 * @param poff Pointer to the instruction buffer offset.
7800 * In case a register needs to be freed up or the value
7801 * loaded off the stack.
7802 * @param fInitialized Set if the variable must already have been initialized.
7803 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7804 * the case.
7805 * @param idxRegPref Preferred register number or UINT8_MAX.
7806 */
7807DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7808 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7809{
7810 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7811 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
7812 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7813
7814 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7815 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7816 {
7817 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
7818 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7819 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7820 return idxReg;
7821 }
7822
7823 /*
7824 * If the kind of variable has not yet been set, default to 'stack'.
7825 */
7826 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
7827 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7828 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
7829 iemNativeVarSetKindToStack(pReNative, idxVar);
7830
7831 /*
7832 * We have to allocate a register for the variable, even if its a stack one
7833 * as we don't know if there are modification being made to it before its
7834 * finalized (todo: analyze and insert hints about that?).
7835 *
7836 * If we can, we try get the correct register for argument variables. This
7837 * is assuming that most argument variables are fetched as close as possible
7838 * to the actual call, so that there aren't any interfering hidden calls
7839 * (memory accesses, etc) inbetween.
7840 *
7841 * If we cannot or it's a variable, we make sure no argument registers
7842 * that will be used by this MC block will be allocated here, and we always
7843 * prefer non-volatile registers to avoid needing to spill stuff for internal
7844 * call.
7845 */
7846 /** @todo Detect too early argument value fetches and warn about hidden
7847 * calls causing less optimal code to be generated in the python script. */
7848
7849 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7850 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7851 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7852 {
7853 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7854 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7855 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7856 }
7857 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7858 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7859 {
7860 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7861 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7862 & ~pReNative->Core.bmHstRegsWithGstShadow
7863 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7864 & fNotArgsMask;
7865 if (fRegs)
7866 {
7867 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7868 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7869 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7870 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7871 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7872 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7873 }
7874 else
7875 {
7876 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7877 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7878 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7879 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7880 }
7881 }
7882 else
7883 {
7884 idxReg = idxRegPref;
7885 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7886 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
7887 }
7888 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7889 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7890
7891 /*
7892 * Load it off the stack if we've got a stack slot.
7893 */
7894 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7895 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7896 {
7897 Assert(fInitialized);
7898 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7899 switch (pReNative->Core.aVars[idxVar].cbVar)
7900 {
7901 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7902 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7903 case 3: AssertFailed(); RT_FALL_THRU();
7904 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7905 default: AssertFailed(); RT_FALL_THRU();
7906 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7907 }
7908 }
7909 else
7910 {
7911 Assert(idxStackSlot == UINT8_MAX);
7912 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7913 }
7914 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7915 return idxReg;
7916}
7917
7918
7919/**
7920 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7921 * guest register.
7922 *
7923 * This function makes sure there is a register for it and sets it to be the
7924 * current shadow copy of @a enmGstReg.
7925 *
7926 * @returns The host register number.
7927 * @param pReNative The recompiler state.
7928 * @param idxVar The variable.
7929 * @param enmGstReg The guest register this variable will be written to
7930 * after this call.
7931 * @param poff Pointer to the instruction buffer offset.
7932 * In case a register needs to be freed up or if the
7933 * variable content needs to be loaded off the stack.
7934 *
7935 * @note We DO NOT expect @a idxVar to be an argument variable,
7936 * because we can only in the commit stage of an instruction when this
7937 * function is used.
7938 */
7939DECL_HIDDEN_THROW(uint8_t)
7940iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7941{
7942 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7943 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7944 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
7945 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
7946 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
7947 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
7948 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7949 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7950
7951 /*
7952 * This shouldn't ever be used for arguments, unless it's in a weird else
7953 * branch that doesn't do any calling and even then it's questionable.
7954 *
7955 * However, in case someone writes crazy wrong MC code and does register
7956 * updates before making calls, just use the regular register allocator to
7957 * ensure we get a register suitable for the intended argument number.
7958 */
7959 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7960
7961 /*
7962 * If there is already a register for the variable, we transfer/set the
7963 * guest shadow copy assignment to it.
7964 */
7965 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7966 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7967 {
7968 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7969 {
7970 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7971 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7972 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7973 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7974 }
7975 else
7976 {
7977 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7978 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7979 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7980 }
7981 /** @todo figure this one out. We need some way of making sure the register isn't
7982 * modified after this point, just in case we start writing crappy MC code. */
7983 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
7984 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7985 return idxReg;
7986 }
7987 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7988
7989 /*
7990 * Because this is supposed to be the commit stage, we're just tag along with the
7991 * temporary register allocator and upgrade it to a variable register.
7992 */
7993 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7994 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7995 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7996 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7997 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7998 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7999
8000 /*
8001 * Now we need to load the register value.
8002 */
8003 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
8004 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
8005 else
8006 {
8007 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8008 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8009 switch (pReNative->Core.aVars[idxVar].cbVar)
8010 {
8011 case sizeof(uint64_t):
8012 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8013 break;
8014 case sizeof(uint32_t):
8015 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8016 break;
8017 case sizeof(uint16_t):
8018 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8019 break;
8020 case sizeof(uint8_t):
8021 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8022 break;
8023 default:
8024 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8025 }
8026 }
8027
8028 pReNative->Core.aVars[idxVar].fRegAcquired = true;
8029 return idxReg;
8030}
8031
8032
8033/**
8034 * Sets the host register for @a idxVarRc to @a idxReg.
8035 *
8036 * The register must not be allocated. Any guest register shadowing will be
8037 * implictly dropped by this call.
8038 *
8039 * The variable must not have any register associated with it (causes
8040 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
8041 * implied.
8042 *
8043 * @returns idxReg
8044 * @param pReNative The recompiler state.
8045 * @param idxVar The variable.
8046 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
8047 * @param off For recording in debug info.
8048 *
8049 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
8050 */
8051DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
8052{
8053 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8054 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8055 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8056 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
8057 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
8058
8059 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
8060 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8061
8062 iemNativeVarSetKindToStack(pReNative, idxVar);
8063 pReNative->Core.aVars[idxVar].idxReg = idxReg;
8064
8065 return idxReg;
8066}
8067
8068
8069/**
8070 * A convenient helper function.
8071 */
8072DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8073 uint8_t idxReg, uint32_t *poff)
8074{
8075 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
8076 pReNative->Core.aVars[idxVar].fRegAcquired = true;
8077 return idxReg;
8078}
8079
8080
8081/**
8082 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8083 *
8084 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8085 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8086 * requirement of flushing anything in volatile host registers when making a
8087 * call.
8088 *
8089 * @returns New @a off value.
8090 * @param pReNative The recompiler state.
8091 * @param off The code buffer position.
8092 * @param fHstRegsNotToSave Set of registers not to save & restore.
8093 */
8094DECL_HIDDEN_THROW(uint32_t)
8095iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8096{
8097 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8098 if (fHstRegs)
8099 {
8100 do
8101 {
8102 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8103 fHstRegs &= ~RT_BIT_32(idxHstReg);
8104
8105 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8106 {
8107 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8108 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
8109 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
8110 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
8111 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8112 switch (pReNative->Core.aVars[idxVar].enmKind)
8113 {
8114 case kIemNativeVarKind_Stack:
8115 {
8116 /* Temporarily spill the variable register. */
8117 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8118 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8119 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8120 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8121 continue;
8122 }
8123
8124 case kIemNativeVarKind_Immediate:
8125 case kIemNativeVarKind_VarRef:
8126 case kIemNativeVarKind_GstRegRef:
8127 /* It is weird to have any of these loaded at this point. */
8128 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8129 continue;
8130
8131 case kIemNativeVarKind_End:
8132 case kIemNativeVarKind_Invalid:
8133 break;
8134 }
8135 AssertFailed();
8136 }
8137 else
8138 {
8139 /*
8140 * Allocate a temporary stack slot and spill the register to it.
8141 */
8142 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8143 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8144 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8145 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8146 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8147 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8148 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8149 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8150 }
8151 } while (fHstRegs);
8152 }
8153 return off;
8154}
8155
8156
8157/**
8158 * Emit code to restore volatile registers after to a call to a helper.
8159 *
8160 * @returns New @a off value.
8161 * @param pReNative The recompiler state.
8162 * @param off The code buffer position.
8163 * @param fHstRegsNotToSave Set of registers not to save & restore.
8164 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8165 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8166 */
8167DECL_HIDDEN_THROW(uint32_t)
8168iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8169{
8170 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8171 if (fHstRegs)
8172 {
8173 do
8174 {
8175 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8176 fHstRegs &= ~RT_BIT_32(idxHstReg);
8177
8178 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8179 {
8180 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8181 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
8182 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
8183 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
8184 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8185 switch (pReNative->Core.aVars[idxVar].enmKind)
8186 {
8187 case kIemNativeVarKind_Stack:
8188 {
8189 /* Unspill the variable register. */
8190 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8191 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%d/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8192 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8193 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8194 continue;
8195 }
8196
8197 case kIemNativeVarKind_Immediate:
8198 case kIemNativeVarKind_VarRef:
8199 case kIemNativeVarKind_GstRegRef:
8200 /* It is weird to have any of these loaded at this point. */
8201 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8202 continue;
8203
8204 case kIemNativeVarKind_End:
8205 case kIemNativeVarKind_Invalid:
8206 break;
8207 }
8208 AssertFailed();
8209 }
8210 else
8211 {
8212 /*
8213 * Restore from temporary stack slot.
8214 */
8215 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8216 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8217 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8218 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8219
8220 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8221 }
8222 } while (fHstRegs);
8223 }
8224 return off;
8225}
8226
8227
8228/**
8229 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8230 *
8231 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8232 */
8233DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8234{
8235 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8236 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8237 {
8238 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8239 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8240 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8241 Assert(cSlots > 0);
8242 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8243 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
8244 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8245 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8246 }
8247 else
8248 Assert(idxStackSlot == UINT8_MAX);
8249}
8250
8251
8252/**
8253 * Worker that frees a single variable.
8254 *
8255 * ASSUMES that @a idxVar is valid.
8256 */
8257DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8258{
8259 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8260 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8261 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8262
8263 /* Free the host register first if any assigned. */
8264 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8265 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8266 {
8267 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
8268 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8269 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8270 }
8271
8272 /* Free argument mapping. */
8273 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8274 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8275 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8276
8277 /* Free the stack slots. */
8278 iemNativeVarFreeStackSlots(pReNative, idxVar);
8279
8280 /* Free the actual variable. */
8281 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8282 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8283}
8284
8285
8286/**
8287 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8288 */
8289DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8290{
8291 while (bmVars != 0)
8292 {
8293 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8294 bmVars &= ~RT_BIT_32(idxVar);
8295
8296#if 1 /** @todo optimize by simplifying this later... */
8297 iemNativeVarFreeOneWorker(pReNative, idxVar);
8298#else
8299 /* Only need to free the host register, the rest is done as bulk updates below. */
8300 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8301 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8302 {
8303 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
8304 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8305 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8306 }
8307#endif
8308 }
8309#if 0 /** @todo optimize by simplifying this later... */
8310 pReNative->Core.bmVars = 0;
8311 pReNative->Core.bmStack = 0;
8312 pReNative->Core.u64ArgVars = UINT64_MAX;
8313#endif
8314}
8315
8316
8317/**
8318 * This is called by IEM_MC_END() to clean up all variables.
8319 */
8320DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
8321{
8322 uint32_t const bmVars = pReNative->Core.bmVars;
8323 if (bmVars != 0)
8324 iemNativeVarFreeAllSlow(pReNative, bmVars);
8325 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8326 Assert(pReNative->Core.bmStack == 0);
8327}
8328
8329
8330#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
8331
8332/**
8333 * This is called by IEM_MC_FREE_LOCAL.
8334 */
8335DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8336{
8337 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8338 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
8339 iemNativeVarFreeOneWorker(pReNative, idxVar);
8340}
8341
8342
8343#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
8344
8345/**
8346 * This is called by IEM_MC_FREE_ARG.
8347 */
8348DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8349{
8350 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8351 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
8352 iemNativeVarFreeOneWorker(pReNative, idxVar);
8353}
8354
8355
8356#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
8357
8358/**
8359 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
8360 */
8361DECL_INLINE_THROW(uint32_t)
8362iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
8363{
8364 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
8365 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
8366 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8367 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
8368 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
8369
8370 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
8371 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
8372 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
8373 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8374
8375 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
8376
8377 /*
8378 * Special case for immediates.
8379 */
8380 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
8381 {
8382 switch (pReNative->Core.aVars[idxVarDst].cbVar)
8383 {
8384 case sizeof(uint16_t):
8385 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
8386 break;
8387 case sizeof(uint32_t):
8388 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
8389 break;
8390 default: AssertFailed(); break;
8391 }
8392 }
8393 else
8394 {
8395 /*
8396 * The generic solution for now.
8397 */
8398 /** @todo optimize this by having the python script make sure the source
8399 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
8400 * statement. Then we could just transfer the register assignments. */
8401 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
8402 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
8403 switch (pReNative->Core.aVars[idxVarDst].cbVar)
8404 {
8405 case sizeof(uint16_t):
8406 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
8407 break;
8408 case sizeof(uint32_t):
8409 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8410 break;
8411 default: AssertFailed(); break;
8412 }
8413 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8414 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8415 }
8416 return off;
8417}
8418
8419
8420
8421/*********************************************************************************************************************************
8422* Emitters for IEM_MC_CALL_CIMPL_XXX *
8423*********************************************************************************************************************************/
8424
8425/**
8426 * Emits code to load a reference to the given guest register into @a idxGprDst.
8427 */
8428DECL_INLINE_THROW(uint32_t)
8429iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8430 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8431{
8432 /*
8433 * Get the offset relative to the CPUMCTX structure.
8434 */
8435 uint32_t offCpumCtx;
8436 switch (enmClass)
8437 {
8438 case kIemNativeGstRegRef_Gpr:
8439 Assert(idxRegInClass < 16);
8440 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8441 break;
8442
8443 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8444 Assert(idxRegInClass < 4);
8445 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8446 break;
8447
8448 case kIemNativeGstRegRef_EFlags:
8449 Assert(idxRegInClass == 0);
8450 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8451 break;
8452
8453 case kIemNativeGstRegRef_MxCsr:
8454 Assert(idxRegInClass == 0);
8455 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8456 break;
8457
8458 case kIemNativeGstRegRef_FpuReg:
8459 Assert(idxRegInClass < 8);
8460 AssertFailed(); /** @todo what kind of indexing? */
8461 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8462 break;
8463
8464 case kIemNativeGstRegRef_MReg:
8465 Assert(idxRegInClass < 8);
8466 AssertFailed(); /** @todo what kind of indexing? */
8467 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8468 break;
8469
8470 case kIemNativeGstRegRef_XReg:
8471 Assert(idxRegInClass < 16);
8472 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8473 break;
8474
8475 default:
8476 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8477 }
8478
8479 /*
8480 * Load the value into the destination register.
8481 */
8482#ifdef RT_ARCH_AMD64
8483 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8484
8485#elif defined(RT_ARCH_ARM64)
8486 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8487 Assert(offCpumCtx < 4096);
8488 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8489
8490#else
8491# error "Port me!"
8492#endif
8493
8494 return off;
8495}
8496
8497
8498/**
8499 * Common code for CIMPL and AIMPL calls.
8500 *
8501 * These are calls that uses argument variables and such. They should not be
8502 * confused with internal calls required to implement an MC operation,
8503 * like a TLB load and similar.
8504 *
8505 * Upon return all that is left to do is to load any hidden arguments and
8506 * perform the call. All argument variables are freed.
8507 *
8508 * @returns New code buffer offset; throws VBox status code on error.
8509 * @param pReNative The native recompile state.
8510 * @param off The code buffer offset.
8511 * @param cArgs The total nubmer of arguments (includes hidden
8512 * count).
8513 * @param cHiddenArgs The number of hidden arguments. The hidden
8514 * arguments must not have any variable declared for
8515 * them, whereas all the regular arguments must
8516 * (tstIEMCheckMc ensures this).
8517 */
8518DECL_HIDDEN_THROW(uint32_t)
8519iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8520{
8521#ifdef VBOX_STRICT
8522 /*
8523 * Assert sanity.
8524 */
8525 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8526 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8527 for (unsigned i = 0; i < cHiddenArgs; i++)
8528 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8529 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8530 {
8531 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8532 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8533 }
8534 iemNativeRegAssertSanity(pReNative);
8535#endif
8536
8537 /*
8538 * Before we do anything else, go over variables that are referenced and
8539 * make sure they are not in a register.
8540 */
8541 uint32_t bmVars = pReNative->Core.bmVars;
8542 if (bmVars)
8543 {
8544 do
8545 {
8546 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8547 bmVars &= ~RT_BIT_32(idxVar);
8548
8549 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8550 {
8551 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8552 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8553 {
8554 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8555 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8556 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8557 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8558 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8559
8560 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8561 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8562 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8563 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8564 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8565 }
8566 }
8567 } while (bmVars != 0);
8568#if 0 //def VBOX_STRICT
8569 iemNativeRegAssertSanity(pReNative);
8570#endif
8571 }
8572
8573 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8574
8575 /*
8576 * First, go over the host registers that will be used for arguments and make
8577 * sure they either hold the desired argument or are free.
8578 */
8579 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8580 {
8581 for (uint32_t i = 0; i < cRegArgs; i++)
8582 {
8583 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8584 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8585 {
8586 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8587 {
8588 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8589 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8590 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
8591 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8592 if (uArgNo == i)
8593 { /* prefect */ }
8594 /* The variable allocator logic should make sure this is impossible,
8595 except for when the return register is used as a parameter (ARM,
8596 but not x86). */
8597#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8598 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8599 {
8600# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8601# error "Implement this"
8602# endif
8603 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8604 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8605 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8606 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8607 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8608 }
8609#endif
8610 else
8611 {
8612 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8613
8614 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
8615 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8616 else
8617 {
8618 /* just free it, can be reloaded if used again */
8619 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8620 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8621 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8622 }
8623 }
8624 }
8625 else
8626 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8627 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8628 }
8629 }
8630#if 0 //def VBOX_STRICT
8631 iemNativeRegAssertSanity(pReNative);
8632#endif
8633 }
8634
8635 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8636
8637#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8638 /*
8639 * If there are any stack arguments, make sure they are in their place as well.
8640 *
8641 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8642 * the caller) be loading it later and it must be free (see first loop).
8643 */
8644 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8645 {
8646 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8647 {
8648 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8649 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8650 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8651 {
8652 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8653 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
8654 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
8655 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8656 }
8657 else
8658 {
8659 /* Use ARG0 as temp for stuff we need registers for. */
8660 switch (pReNative->Core.aVars[idxVar].enmKind)
8661 {
8662 case kIemNativeVarKind_Stack:
8663 {
8664 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8665 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8666 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8667 iemNativeStackCalcBpDisp(idxStackSlot));
8668 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8669 continue;
8670 }
8671
8672 case kIemNativeVarKind_Immediate:
8673 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
8674 continue;
8675
8676 case kIemNativeVarKind_VarRef:
8677 {
8678 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8679 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8680 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8681 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8682 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8683 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8684 {
8685 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8686 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8687 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8688 }
8689 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8690 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8691 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8692 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8693 continue;
8694 }
8695
8696 case kIemNativeVarKind_GstRegRef:
8697 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8698 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8699 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8700 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8701 continue;
8702
8703 case kIemNativeVarKind_Invalid:
8704 case kIemNativeVarKind_End:
8705 break;
8706 }
8707 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8708 }
8709 }
8710# if 0 //def VBOX_STRICT
8711 iemNativeRegAssertSanity(pReNative);
8712# endif
8713 }
8714#else
8715 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8716#endif
8717
8718 /*
8719 * Make sure the argument variables are loaded into their respective registers.
8720 *
8721 * We can optimize this by ASSUMING that any register allocations are for
8722 * registeres that have already been loaded and are ready. The previous step
8723 * saw to that.
8724 */
8725 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8726 {
8727 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8728 {
8729 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8730 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8731 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
8732 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8733 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8734 else
8735 {
8736 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8737 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8738 {
8739 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
8740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
8741 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
8742 | RT_BIT_32(idxArgReg);
8743 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
8744 }
8745 else
8746 {
8747 /* Use ARG0 as temp for stuff we need registers for. */
8748 switch (pReNative->Core.aVars[idxVar].enmKind)
8749 {
8750 case kIemNativeVarKind_Stack:
8751 {
8752 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8753 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8754 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8755 continue;
8756 }
8757
8758 case kIemNativeVarKind_Immediate:
8759 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
8760 continue;
8761
8762 case kIemNativeVarKind_VarRef:
8763 {
8764 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8765 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8766 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8767 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8768 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8769 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8770 {
8771 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8772 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8773 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8774 }
8775 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8776 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8777 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8778 continue;
8779 }
8780
8781 case kIemNativeVarKind_GstRegRef:
8782 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8783 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8784 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8785 continue;
8786
8787 case kIemNativeVarKind_Invalid:
8788 case kIemNativeVarKind_End:
8789 break;
8790 }
8791 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8792 }
8793 }
8794 }
8795#if 0 //def VBOX_STRICT
8796 iemNativeRegAssertSanity(pReNative);
8797#endif
8798 }
8799#ifdef VBOX_STRICT
8800 else
8801 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8802 {
8803 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8804 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8805 }
8806#endif
8807
8808 /*
8809 * Free all argument variables (simplified).
8810 * Their lifetime always expires with the call they are for.
8811 */
8812 /** @todo Make the python script check that arguments aren't used after
8813 * IEM_MC_CALL_XXXX. */
8814 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8815 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8816 * an argument value. There is also some FPU stuff. */
8817 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8818 {
8819 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8820 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8821
8822 /* no need to free registers: */
8823 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8824 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8825 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8826 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8827 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8828 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8829
8830 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8831 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8832 iemNativeVarFreeStackSlots(pReNative, idxVar);
8833 }
8834 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8835
8836 /*
8837 * Flush volatile registers as we make the call.
8838 */
8839 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8840
8841 return off;
8842}
8843
8844
8845/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
8846DECL_HIDDEN_THROW(uint32_t)
8847iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
8848 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
8849
8850{
8851 /*
8852 * Do all the call setup and cleanup.
8853 */
8854 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
8855
8856 /*
8857 * Load the two or three hidden arguments.
8858 */
8859#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8860 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
8861 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8862 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
8863#else
8864 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8865 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
8866#endif
8867
8868 /*
8869 * Make the call and check the return code.
8870 *
8871 * Shadow PC copies are always flushed here, other stuff depends on flags.
8872 * Segment and general purpose registers are explictily flushed via the
8873 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
8874 * macros.
8875 */
8876 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
8877#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8878 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
8879#endif
8880 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
8881 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
8882 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
8883 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
8884
8885 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
8886}
8887
8888
8889#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
8890 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
8891
8892/** Emits code for IEM_MC_CALL_CIMPL_1. */
8893DECL_INLINE_THROW(uint32_t)
8894iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8895 uintptr_t pfnCImpl, uint8_t idxArg0)
8896{
8897 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8898 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
8899}
8900
8901
8902#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
8903 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
8904
8905/** Emits code for IEM_MC_CALL_CIMPL_2. */
8906DECL_INLINE_THROW(uint32_t)
8907iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8908 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
8909{
8910 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8911 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8912 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
8913}
8914
8915
8916#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
8917 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8918 (uintptr_t)a_pfnCImpl, a0, a1, a2)
8919
8920/** Emits code for IEM_MC_CALL_CIMPL_3. */
8921DECL_INLINE_THROW(uint32_t)
8922iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8923 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8924{
8925 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8926 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8927 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8928 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
8929}
8930
8931
8932#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
8933 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8934 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
8935
8936/** Emits code for IEM_MC_CALL_CIMPL_4. */
8937DECL_INLINE_THROW(uint32_t)
8938iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8939 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8940{
8941 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8942 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8943 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8944 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8945 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
8946}
8947
8948
8949#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
8950 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8951 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
8952
8953/** Emits code for IEM_MC_CALL_CIMPL_4. */
8954DECL_INLINE_THROW(uint32_t)
8955iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8956 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
8957{
8958 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8959 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8960 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8961 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8962 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
8963 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
8964}
8965
8966
8967/** Recompiler debugging: Flush guest register shadow copies. */
8968#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
8969
8970
8971
8972/*********************************************************************************************************************************
8973* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
8974*********************************************************************************************************************************/
8975
8976/**
8977 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
8978 */
8979DECL_INLINE_THROW(uint32_t)
8980iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8981 uintptr_t pfnAImpl, uint8_t cArgs)
8982{
8983 if (idxVarRc != UINT8_MAX)
8984 {
8985 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
8986 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
8987 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
8988 }
8989
8990 /*
8991 * Do all the call setup and cleanup.
8992 */
8993 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
8994
8995 /*
8996 * Make the call and update the return code variable if we've got one.
8997 */
8998 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8999 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
9000 {
9001pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
9002 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
9003 }
9004
9005 return off;
9006}
9007
9008
9009
9010#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
9011 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
9012
9013#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
9014 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
9015
9016/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
9017DECL_INLINE_THROW(uint32_t)
9018iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
9019{
9020 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
9021}
9022
9023
9024#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
9025 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
9026
9027#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
9028 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
9029
9030/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
9031DECL_INLINE_THROW(uint32_t)
9032iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
9033{
9034 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9035 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
9036}
9037
9038
9039#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
9040 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
9041
9042#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
9043 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
9044
9045/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
9046DECL_INLINE_THROW(uint32_t)
9047iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9048 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9049{
9050 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9051 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9052 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
9053}
9054
9055
9056#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
9057 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
9058
9059#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
9060 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
9061
9062/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
9063DECL_INLINE_THROW(uint32_t)
9064iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9065 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9066{
9067 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9068 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9069 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9070 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
9071}
9072
9073
9074#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
9075 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9076
9077#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
9078 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9079
9080/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
9081DECL_INLINE_THROW(uint32_t)
9082iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9083 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9084{
9085 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9086 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9087 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9088 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
9089 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
9090}
9091
9092
9093
9094/*********************************************************************************************************************************
9095* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
9096*********************************************************************************************************************************/
9097
9098#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
9099 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
9100
9101#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9102 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
9103
9104#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9105 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
9106
9107#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9108 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
9109
9110
9111/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
9112 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
9113DECL_INLINE_THROW(uint32_t)
9114iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
9115{
9116 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9117 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
9118 Assert(iGRegEx < 20);
9119
9120 /* Same discussion as in iemNativeEmitFetchGregU16 */
9121 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9122 kIemNativeGstRegUse_ReadOnly);
9123
9124 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9125 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9126
9127 /* The value is zero-extended to the full 64-bit host register width. */
9128 if (iGRegEx < 16)
9129 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9130 else
9131 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9132
9133 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9134 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9135 return off;
9136}
9137
9138
9139#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9140 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
9141
9142#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9143 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
9144
9145#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9146 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
9147
9148/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
9149DECL_INLINE_THROW(uint32_t)
9150iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
9151{
9152 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9153 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
9154 Assert(iGRegEx < 20);
9155
9156 /* Same discussion as in iemNativeEmitFetchGregU16 */
9157 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9158 kIemNativeGstRegUse_ReadOnly);
9159
9160 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9161 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9162
9163 if (iGRegEx < 16)
9164 {
9165 switch (cbSignExtended)
9166 {
9167 case sizeof(uint16_t):
9168 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9169 break;
9170 case sizeof(uint32_t):
9171 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9172 break;
9173 case sizeof(uint64_t):
9174 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9175 break;
9176 default: AssertFailed(); break;
9177 }
9178 }
9179 else
9180 {
9181 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9182 switch (cbSignExtended)
9183 {
9184 case sizeof(uint16_t):
9185 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9186 break;
9187 case sizeof(uint32_t):
9188 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9189 break;
9190 case sizeof(uint64_t):
9191 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9192 break;
9193 default: AssertFailed(); break;
9194 }
9195 }
9196
9197 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9198 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9199 return off;
9200}
9201
9202
9203
9204#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
9205 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
9206
9207#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
9208 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9209
9210#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
9211 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9212
9213/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
9214DECL_INLINE_THROW(uint32_t)
9215iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9216{
9217 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9218 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
9219 Assert(iGReg < 16);
9220
9221 /*
9222 * We can either just load the low 16-bit of the GPR into a host register
9223 * for the variable, or we can do so via a shadow copy host register. The
9224 * latter will avoid having to reload it if it's being stored later, but
9225 * will waste a host register if it isn't touched again. Since we don't
9226 * know what going to happen, we choose the latter for now.
9227 */
9228 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9229 kIemNativeGstRegUse_ReadOnly);
9230
9231 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9232 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9233 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9234 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9235
9236 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9237 return off;
9238}
9239
9240
9241#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
9242 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9243
9244#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
9245 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9246
9247/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
9248DECL_INLINE_THROW(uint32_t)
9249iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
9250{
9251 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9252 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
9253 Assert(iGReg < 16);
9254
9255 /*
9256 * We can either just load the low 16-bit of the GPR into a host register
9257 * for the variable, or we can do so via a shadow copy host register. The
9258 * latter will avoid having to reload it if it's being stored later, but
9259 * will waste a host register if it isn't touched again. Since we don't
9260 * know what going to happen, we choose the latter for now.
9261 */
9262 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9263 kIemNativeGstRegUse_ReadOnly);
9264
9265 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9266 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9267 if (cbSignExtended == sizeof(uint32_t))
9268 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9269 else
9270 {
9271 Assert(cbSignExtended == sizeof(uint64_t));
9272 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9273 }
9274 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9275
9276 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9277 return off;
9278}
9279
9280
9281#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
9282 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
9283
9284#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
9285 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
9286
9287/** Emits code for IEM_MC_FETCH_GREG_U32. */
9288DECL_INLINE_THROW(uint32_t)
9289iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9290{
9291 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9292 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
9293 Assert(iGReg < 16);
9294
9295 /*
9296 * We can either just load the low 16-bit of the GPR into a host register
9297 * for the variable, or we can do so via a shadow copy host register. The
9298 * latter will avoid having to reload it if it's being stored later, but
9299 * will waste a host register if it isn't touched again. Since we don't
9300 * know what going to happen, we choose the latter for now.
9301 */
9302 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9303 kIemNativeGstRegUse_ReadOnly);
9304
9305 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9306 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9307 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9308 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9309
9310 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9311 return off;
9312}
9313
9314
9315#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
9316 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
9317
9318/** Emits code for IEM_MC_FETCH_GREG_U32. */
9319DECL_INLINE_THROW(uint32_t)
9320iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9321{
9322 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9323 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
9324 Assert(iGReg < 16);
9325
9326 /*
9327 * We can either just load the low 32-bit of the GPR into a host register
9328 * for the variable, or we can do so via a shadow copy host register. The
9329 * latter will avoid having to reload it if it's being stored later, but
9330 * will waste a host register if it isn't touched again. Since we don't
9331 * know what going to happen, we choose the latter for now.
9332 */
9333 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9334 kIemNativeGstRegUse_ReadOnly);
9335
9336 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9337 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9338 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9339 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9340
9341 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9342 return off;
9343}
9344
9345
9346#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
9347 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9348
9349#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
9350 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9351
9352/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
9353 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
9354DECL_INLINE_THROW(uint32_t)
9355iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9356{
9357 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9358 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
9359 Assert(iGReg < 16);
9360
9361 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9362 kIemNativeGstRegUse_ReadOnly);
9363
9364 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9365 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9366 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
9367 /** @todo name the register a shadow one already? */
9368 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9369
9370 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9371 return off;
9372}
9373
9374
9375
9376/*********************************************************************************************************************************
9377* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
9378*********************************************************************************************************************************/
9379
9380#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
9381 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
9382
9383/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
9384DECL_INLINE_THROW(uint32_t)
9385iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
9386{
9387 Assert(iGRegEx < 20);
9388 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9389 kIemNativeGstRegUse_ForUpdate);
9390#ifdef RT_ARCH_AMD64
9391 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9392
9393 /* To the lowest byte of the register: mov r8, imm8 */
9394 if (iGRegEx < 16)
9395 {
9396 if (idxGstTmpReg >= 8)
9397 pbCodeBuf[off++] = X86_OP_REX_B;
9398 else if (idxGstTmpReg >= 4)
9399 pbCodeBuf[off++] = X86_OP_REX;
9400 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9401 pbCodeBuf[off++] = u8Value;
9402 }
9403 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
9404 else if (idxGstTmpReg < 4)
9405 {
9406 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
9407 pbCodeBuf[off++] = u8Value;
9408 }
9409 else
9410 {
9411 /* ror reg64, 8 */
9412 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9413 pbCodeBuf[off++] = 0xc1;
9414 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9415 pbCodeBuf[off++] = 8;
9416
9417 /* mov reg8, imm8 */
9418 if (idxGstTmpReg >= 8)
9419 pbCodeBuf[off++] = X86_OP_REX_B;
9420 else if (idxGstTmpReg >= 4)
9421 pbCodeBuf[off++] = X86_OP_REX;
9422 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9423 pbCodeBuf[off++] = u8Value;
9424
9425 /* rol reg64, 8 */
9426 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9427 pbCodeBuf[off++] = 0xc1;
9428 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9429 pbCodeBuf[off++] = 8;
9430 }
9431
9432#elif defined(RT_ARCH_ARM64)
9433 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9434 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9435 if (iGRegEx < 16)
9436 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9437 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9438 else
9439 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9440 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9441 iemNativeRegFreeTmp(pReNative, idxImmReg);
9442
9443#else
9444# error "Port me!"
9445#endif
9446
9447 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9448
9449 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9450
9451 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9452 return off;
9453}
9454
9455
9456#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9457 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9458
9459/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9460DECL_INLINE_THROW(uint32_t)
9461iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9462{
9463 Assert(iGRegEx < 20);
9464 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9465
9466 /*
9467 * If it's a constant value (unlikely) we treat this as a
9468 * IEM_MC_STORE_GREG_U8_CONST statement.
9469 */
9470 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9471 { /* likely */ }
9472 else
9473 {
9474 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9475 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9476 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9477 }
9478
9479 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9480 kIemNativeGstRegUse_ForUpdate);
9481 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9482
9483#ifdef RT_ARCH_AMD64
9484 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
9485 if (iGRegEx < 16)
9486 {
9487 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
9488 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9489 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9490 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9491 pbCodeBuf[off++] = X86_OP_REX;
9492 pbCodeBuf[off++] = 0x8a;
9493 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9494 }
9495 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
9496 else if (idxGstTmpReg < 4 && idxVarReg < 4)
9497 {
9498 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
9499 pbCodeBuf[off++] = 0x8a;
9500 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
9501 }
9502 else
9503 {
9504 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
9505
9506 /* ror reg64, 8 */
9507 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9508 pbCodeBuf[off++] = 0xc1;
9509 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9510 pbCodeBuf[off++] = 8;
9511
9512 /* mov reg8, reg8(r/m) */
9513 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9514 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9515 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9516 pbCodeBuf[off++] = X86_OP_REX;
9517 pbCodeBuf[off++] = 0x8a;
9518 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9519
9520 /* rol reg64, 8 */
9521 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9522 pbCodeBuf[off++] = 0xc1;
9523 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9524 pbCodeBuf[off++] = 8;
9525 }
9526
9527#elif defined(RT_ARCH_ARM64)
9528 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
9529 or
9530 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
9531 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9532 if (iGRegEx < 16)
9533 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
9534 else
9535 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
9536
9537#else
9538# error "Port me!"
9539#endif
9540 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9541
9542 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9543
9544 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9545 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9546 return off;
9547}
9548
9549
9550
9551#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
9552 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
9553
9554/** Emits code for IEM_MC_STORE_GREG_U16. */
9555DECL_INLINE_THROW(uint32_t)
9556iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
9557{
9558 Assert(iGReg < 16);
9559 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9560 kIemNativeGstRegUse_ForUpdate);
9561#ifdef RT_ARCH_AMD64
9562 /* mov reg16, imm16 */
9563 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9564 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9565 if (idxGstTmpReg >= 8)
9566 pbCodeBuf[off++] = X86_OP_REX_B;
9567 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
9568 pbCodeBuf[off++] = RT_BYTE1(uValue);
9569 pbCodeBuf[off++] = RT_BYTE2(uValue);
9570
9571#elif defined(RT_ARCH_ARM64)
9572 /* movk xdst, #uValue, lsl #0 */
9573 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9574 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
9575
9576#else
9577# error "Port me!"
9578#endif
9579
9580 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9581
9582 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9583 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9584 return off;
9585}
9586
9587
9588#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
9589 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
9590
9591/** Emits code for IEM_MC_STORE_GREG_U16. */
9592DECL_INLINE_THROW(uint32_t)
9593iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9594{
9595 Assert(iGReg < 16);
9596 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9597
9598 /*
9599 * If it's a constant value (unlikely) we treat this as a
9600 * IEM_MC_STORE_GREG_U16_CONST statement.
9601 */
9602 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9603 { /* likely */ }
9604 else
9605 {
9606 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9607 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9608 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9609 }
9610
9611 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9612 kIemNativeGstRegUse_ForUpdate);
9613
9614#ifdef RT_ARCH_AMD64
9615 /* mov reg16, reg16 or [mem16] */
9616 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9617 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9618 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9619 {
9620 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
9621 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
9622 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
9623 pbCodeBuf[off++] = 0x8b;
9624 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
9625 }
9626 else
9627 {
9628 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
9629 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9630 if (idxGstTmpReg >= 8)
9631 pbCodeBuf[off++] = X86_OP_REX_R;
9632 pbCodeBuf[off++] = 0x8b;
9633 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9634 }
9635
9636#elif defined(RT_ARCH_ARM64)
9637 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
9638 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9639 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9640 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
9641 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9642
9643#else
9644# error "Port me!"
9645#endif
9646
9647 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9648
9649 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9650 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9651 return off;
9652}
9653
9654
9655#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
9656 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
9657
9658/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
9659DECL_INLINE_THROW(uint32_t)
9660iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
9661{
9662 Assert(iGReg < 16);
9663 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9664 kIemNativeGstRegUse_ForFullWrite);
9665 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9666 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9667 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9668 return off;
9669}
9670
9671
9672#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
9673 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
9674
9675/** Emits code for IEM_MC_STORE_GREG_U32. */
9676DECL_INLINE_THROW(uint32_t)
9677iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9678{
9679 Assert(iGReg < 16);
9680 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9681
9682 /*
9683 * If it's a constant value (unlikely) we treat this as a
9684 * IEM_MC_STORE_GREG_U32_CONST statement.
9685 */
9686 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9687 { /* likely */ }
9688 else
9689 {
9690 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9691 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9692 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9693 }
9694
9695 /*
9696 * For the rest we allocate a guest register for the variable and writes
9697 * it to the CPUMCTX structure.
9698 */
9699 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9700 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9701#ifdef VBOX_STRICT
9702 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
9703#endif
9704 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9705 return off;
9706}
9707
9708
9709#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
9710 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
9711
9712/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
9713DECL_INLINE_THROW(uint32_t)
9714iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
9715{
9716 Assert(iGReg < 16);
9717 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9718 kIemNativeGstRegUse_ForFullWrite);
9719 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9720 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9721 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9722 return off;
9723}
9724
9725
9726#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
9727 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
9728
9729/** Emits code for IEM_MC_STORE_GREG_U64. */
9730DECL_INLINE_THROW(uint32_t)
9731iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9732{
9733 Assert(iGReg < 16);
9734 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9735
9736 /*
9737 * If it's a constant value (unlikely) we treat this as a
9738 * IEM_MC_STORE_GREG_U64_CONST statement.
9739 */
9740 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9741 { /* likely */ }
9742 else
9743 {
9744 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9745 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9746 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
9747 }
9748
9749 /*
9750 * For the rest we allocate a guest register for the variable and writes
9751 * it to the CPUMCTX structure.
9752 */
9753 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9754 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9755 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9756 return off;
9757}
9758
9759
9760#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
9761 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
9762
9763/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
9764DECL_INLINE_THROW(uint32_t)
9765iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
9766{
9767 Assert(iGReg < 16);
9768 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9769 kIemNativeGstRegUse_ForUpdate);
9770 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
9771 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9772 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9773 return off;
9774}
9775
9776
9777/*********************************************************************************************************************************
9778* General purpose register manipulation (add, sub). *
9779*********************************************************************************************************************************/
9780
9781#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9782 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9783
9784/** Emits code for IEM_MC_ADD_GREG_U16. */
9785DECL_INLINE_THROW(uint32_t)
9786iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
9787{
9788 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9789 kIemNativeGstRegUse_ForUpdate);
9790
9791#ifdef RT_ARCH_AMD64
9792 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9793 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9794 if (idxGstTmpReg >= 8)
9795 pbCodeBuf[off++] = X86_OP_REX_B;
9796 if (uAddend == 1)
9797 {
9798 pbCodeBuf[off++] = 0xff; /* inc */
9799 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9800 }
9801 else
9802 {
9803 pbCodeBuf[off++] = 0x81;
9804 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9805 pbCodeBuf[off++] = uAddend;
9806 pbCodeBuf[off++] = 0;
9807 }
9808
9809#else
9810 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9811 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9812
9813 /* sub tmp, gstgrp, uAddend */
9814 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
9815
9816 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9817 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9818
9819 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9820#endif
9821
9822 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9823
9824 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9825
9826 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9827 return off;
9828}
9829
9830
9831#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
9832 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9833
9834#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
9835 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9836
9837/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
9838DECL_INLINE_THROW(uint32_t)
9839iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
9840{
9841 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9842 kIemNativeGstRegUse_ForUpdate);
9843
9844#ifdef RT_ARCH_AMD64
9845 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9846 if (f64Bit)
9847 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9848 else if (idxGstTmpReg >= 8)
9849 pbCodeBuf[off++] = X86_OP_REX_B;
9850 if (uAddend == 1)
9851 {
9852 pbCodeBuf[off++] = 0xff; /* inc */
9853 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9854 }
9855 else if (uAddend < 128)
9856 {
9857 pbCodeBuf[off++] = 0x83; /* add */
9858 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9859 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9860 }
9861 else
9862 {
9863 pbCodeBuf[off++] = 0x81; /* add */
9864 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9865 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9866 pbCodeBuf[off++] = 0;
9867 pbCodeBuf[off++] = 0;
9868 pbCodeBuf[off++] = 0;
9869 }
9870
9871#else
9872 /* sub tmp, gstgrp, uAddend */
9873 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9874 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
9875
9876#endif
9877
9878 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9879
9880 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9881
9882 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9883 return off;
9884}
9885
9886
9887
9888#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9889 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9890
9891/** Emits code for IEM_MC_SUB_GREG_U16. */
9892DECL_INLINE_THROW(uint32_t)
9893iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
9894{
9895 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9896 kIemNativeGstRegUse_ForUpdate);
9897
9898#ifdef RT_ARCH_AMD64
9899 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9900 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9901 if (idxGstTmpReg >= 8)
9902 pbCodeBuf[off++] = X86_OP_REX_B;
9903 if (uSubtrahend == 1)
9904 {
9905 pbCodeBuf[off++] = 0xff; /* dec */
9906 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9907 }
9908 else
9909 {
9910 pbCodeBuf[off++] = 0x81;
9911 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9912 pbCodeBuf[off++] = uSubtrahend;
9913 pbCodeBuf[off++] = 0;
9914 }
9915
9916#else
9917 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9918 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9919
9920 /* sub tmp, gstgrp, uSubtrahend */
9921 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
9922
9923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9924 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9925
9926 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9927#endif
9928
9929 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9930
9931 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9932
9933 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9934 return off;
9935}
9936
9937
9938#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
9939 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9940
9941#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
9942 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9943
9944/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
9945DECL_INLINE_THROW(uint32_t)
9946iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
9947{
9948 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9949 kIemNativeGstRegUse_ForUpdate);
9950
9951#ifdef RT_ARCH_AMD64
9952 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9953 if (f64Bit)
9954 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9955 else if (idxGstTmpReg >= 8)
9956 pbCodeBuf[off++] = X86_OP_REX_B;
9957 if (uSubtrahend == 1)
9958 {
9959 pbCodeBuf[off++] = 0xff; /* dec */
9960 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9961 }
9962 else if (uSubtrahend < 128)
9963 {
9964 pbCodeBuf[off++] = 0x83; /* sub */
9965 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9966 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9967 }
9968 else
9969 {
9970 pbCodeBuf[off++] = 0x81; /* sub */
9971 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9972 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9973 pbCodeBuf[off++] = 0;
9974 pbCodeBuf[off++] = 0;
9975 pbCodeBuf[off++] = 0;
9976 }
9977
9978#else
9979 /* sub tmp, gstgrp, uSubtrahend */
9980 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9981 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
9982
9983#endif
9984
9985 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9986
9987 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9988
9989 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9990 return off;
9991}
9992
9993
9994/*********************************************************************************************************************************
9995* Local variable manipulation (add, sub, and, or). *
9996*********************************************************************************************************************************/
9997
9998#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
9999 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10000
10001#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
10002 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10003
10004#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
10005 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10006
10007#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
10008 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10009
10010/** Emits code for AND'ing a local and a constant value. */
10011DECL_INLINE_THROW(uint32_t)
10012iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10013{
10014 Assert(pReNative->Core.aVars[idxVar].cbVar == cbMask);
10015#ifdef VBOX_STRICT
10016 switch (cbMask)
10017 {
10018 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10019 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10020 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10021 case sizeof(uint64_t): break;
10022 default: AssertFailedBreak();
10023 }
10024#endif
10025
10026 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10027 if (cbMask <= sizeof(uint32_t))
10028 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
10029 else
10030 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
10031 iemNativeVarRegisterRelease(pReNative, idxVar);
10032 return off;
10033}
10034
10035
10036#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
10037 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10038
10039#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
10040 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10041
10042#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
10043 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10044
10045#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
10046 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10047
10048/** Emits code for OR'ing a local and a constant value. */
10049DECL_INLINE_THROW(uint32_t)
10050iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10051{
10052 Assert(pReNative->Core.aVars[idxVar].cbVar == cbMask);
10053#ifdef VBOX_STRICT
10054 switch (cbMask)
10055 {
10056 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10057 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10058 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10059 case sizeof(uint64_t): break;
10060 default: AssertFailedBreak();
10061 }
10062#endif
10063
10064 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10065 if (cbMask <= sizeof(uint32_t))
10066 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
10067 else
10068 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
10069 iemNativeVarRegisterRelease(pReNative, idxVar);
10070 return off;
10071}
10072
10073
10074#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
10075 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
10076
10077#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
10078 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
10079
10080#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
10081 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
10082
10083/** Emits code for reversing the byte order in a local value. */
10084DECL_INLINE_THROW(uint32_t)
10085iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
10086{
10087 Assert(pReNative->Core.aVars[idxVar].cbVar == cbLocal);
10088
10089 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10090
10091 switch (cbLocal)
10092 {
10093 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
10094 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
10095 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
10096 default: AssertFailedBreak();
10097 }
10098
10099 iemNativeVarRegisterRelease(pReNative, idxVar);
10100 return off;
10101}
10102
10103
10104
10105/*********************************************************************************************************************************
10106* EFLAGS *
10107*********************************************************************************************************************************/
10108
10109#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10110# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
10111#else
10112# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
10113 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
10114
10115DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
10116{
10117 if (fEflOutput)
10118 {
10119 PVMCPUCC const pVCpu = pReNative->pVCpu;
10120# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10121 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
10122 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
10123 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
10124# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10125 if (fEflOutput & (a_fEfl)) \
10126 { \
10127 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
10128 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10129 else \
10130 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10131 } else do { } while (0)
10132# else
10133 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
10134 IEMLIVENESSBIT const LivenessClobbered =
10135 {
10136 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10137 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10138 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10139 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10140 };
10141 IEMLIVENESSBIT const LivenessDelayable =
10142 {
10143 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10144 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10145 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10146 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10147 };
10148# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10149 if (fEflOutput & (a_fEfl)) \
10150 { \
10151 if (LivenessClobbered.a_fLivenessMember) \
10152 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10153 else if (LivenessDelayable.a_fLivenessMember) \
10154 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
10155 else \
10156 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10157 } else do { } while (0)
10158# endif
10159 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
10160 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
10161 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
10162 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
10163 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
10164 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
10165 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
10166# undef CHECK_FLAG_AND_UPDATE_STATS
10167 }
10168 RT_NOREF(fEflInput);
10169}
10170#endif /* VBOX_WITH_STATISTICS */
10171
10172#undef IEM_MC_FETCH_EFLAGS /* should not be used */
10173#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10174 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
10175
10176/** Handles IEM_MC_FETCH_EFLAGS_EX. */
10177DECL_INLINE_THROW(uint32_t)
10178iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
10179 uint32_t fEflInput, uint32_t fEflOutput)
10180{
10181 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
10182 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
10183 RT_NOREF(fEflInput, fEflOutput);
10184
10185#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10186# ifdef VBOX_STRICT
10187 if ( pReNative->idxCurCall != 0
10188 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
10189 {
10190 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
10191 uint32_t const fBoth = fEflInput | fEflOutput;
10192# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
10193 AssertMsg( !(fBoth & (a_fElfConst)) \
10194 || (!(fEflInput & (a_fElfConst)) \
10195 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10196 : !(fEflOutput & (a_fElfConst)) \
10197 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10198 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
10199 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
10200 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
10201 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
10202 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
10203 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
10204 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
10205 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
10206 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
10207# undef ASSERT_ONE_EFL
10208 }
10209# endif
10210#endif
10211
10212 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
10213 * the existing shadow copy. */
10214 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
10215 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10216 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
10217 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10218 return off;
10219}
10220
10221
10222
10223/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
10224 * start using it with custom native code emission (inlining assembly
10225 * instruction helpers). */
10226#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
10227#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10228 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10229 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
10230
10231/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
10232DECL_INLINE_THROW(uint32_t)
10233iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
10234{
10235 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
10236 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
10237 RT_NOREF(fEflOutput);
10238
10239 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
10240
10241#ifdef VBOX_STRICT
10242 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
10243 uint32_t offFixup = off;
10244 off = iemNativeEmitJnzToFixed(pReNative, off, off);
10245 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
10246 iemNativeFixupFixedJump(pReNative, offFixup, off);
10247
10248 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
10249 offFixup = off;
10250 off = iemNativeEmitJzToFixed(pReNative, off, off);
10251 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
10252 iemNativeFixupFixedJump(pReNative, offFixup, off);
10253
10254 /** @todo validate that only bits in the fElfOutput mask changed. */
10255#endif
10256
10257 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10258 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
10259 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10260 return off;
10261}
10262
10263
10264
10265/*********************************************************************************************************************************
10266* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
10267*********************************************************************************************************************************/
10268
10269#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
10270 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
10271
10272#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
10273 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
10274
10275#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
10276 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
10277
10278
10279/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
10280 * IEM_MC_FETCH_SREG_ZX_U64. */
10281DECL_INLINE_THROW(uint32_t)
10282iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
10283{
10284 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10285 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
10286 Assert(iSReg < X86_SREG_COUNT);
10287
10288 /*
10289 * For now, we will not create a shadow copy of a selector. The rational
10290 * is that since we do not recompile the popping and loading of segment
10291 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
10292 * pushing and moving to registers, there is only a small chance that the
10293 * shadow copy will be accessed again before the register is reloaded. One
10294 * scenario would be nested called in 16-bit code, but I doubt it's worth
10295 * the extra register pressure atm.
10296 *
10297 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
10298 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
10299 * store scencario covered at present (r160730).
10300 */
10301 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10302 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10303 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
10304 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10305 return off;
10306}
10307
10308
10309
10310/*********************************************************************************************************************************
10311* Register references. *
10312*********************************************************************************************************************************/
10313
10314#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
10315 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
10316
10317#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
10318 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
10319
10320/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
10321DECL_INLINE_THROW(uint32_t)
10322iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
10323{
10324 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10325 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10326 Assert(iGRegEx < 20);
10327
10328 if (iGRegEx < 16)
10329 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10330 else
10331 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
10332
10333 /* If we've delayed writing back the register value, flush it now. */
10334 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10335
10336 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10337 if (!fConst)
10338 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
10339
10340 return off;
10341}
10342
10343#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
10344 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
10345
10346#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
10347 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
10348
10349#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
10350 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
10351
10352#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
10353 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
10354
10355#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
10356 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
10357
10358#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
10359 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
10360
10361#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
10362 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
10363
10364#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
10365 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
10366
10367#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
10368 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
10369
10370#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
10371 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
10372
10373/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
10374DECL_INLINE_THROW(uint32_t)
10375iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
10376{
10377 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10378 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10379 Assert(iGReg < 16);
10380
10381 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
10382
10383 /* If we've delayed writing back the register value, flush it now. */
10384 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
10385
10386 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10387 if (!fConst)
10388 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
10389
10390 return off;
10391}
10392
10393
10394#undef IEM_MC_REF_EFLAGS /* should not be used. */
10395#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
10396 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10397 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
10398
10399/** Handles IEM_MC_REF_EFLAGS. */
10400DECL_INLINE_THROW(uint32_t)
10401iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10402{
10403 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10404 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10405
10406 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
10407
10408 /* If we've delayed writing back the register value, flush it now. */
10409 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
10410
10411 /* If there is a shadow copy of guest EFLAGS, flush it now. */
10412 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
10413
10414 return off;
10415}
10416
10417
10418/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
10419 * different code from threaded recompiler, maybe it would be helpful. For now
10420 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
10421#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
10422
10423
10424
10425/*********************************************************************************************************************************
10426* Effective Address Calculation *
10427*********************************************************************************************************************************/
10428#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
10429 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
10430
10431/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
10432 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
10433DECL_INLINE_THROW(uint32_t)
10434iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10435 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
10436{
10437 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10438
10439 /*
10440 * Handle the disp16 form with no registers first.
10441 *
10442 * Convert to an immediate value, as that'll delay the register allocation
10443 * and assignment till the memory access / call / whatever and we can use
10444 * a more appropriate register (or none at all).
10445 */
10446 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
10447 {
10448 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
10449 return off;
10450 }
10451
10452 /* Determin the displacment. */
10453 uint16_t u16EffAddr;
10454 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10455 {
10456 case 0: u16EffAddr = 0; break;
10457 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
10458 case 2: u16EffAddr = u16Disp; break;
10459 default: AssertFailedStmt(u16EffAddr = 0);
10460 }
10461
10462 /* Determine the registers involved. */
10463 uint8_t idxGstRegBase;
10464 uint8_t idxGstRegIndex;
10465 switch (bRm & X86_MODRM_RM_MASK)
10466 {
10467 case 0:
10468 idxGstRegBase = X86_GREG_xBX;
10469 idxGstRegIndex = X86_GREG_xSI;
10470 break;
10471 case 1:
10472 idxGstRegBase = X86_GREG_xBX;
10473 idxGstRegIndex = X86_GREG_xDI;
10474 break;
10475 case 2:
10476 idxGstRegBase = X86_GREG_xBP;
10477 idxGstRegIndex = X86_GREG_xSI;
10478 break;
10479 case 3:
10480 idxGstRegBase = X86_GREG_xBP;
10481 idxGstRegIndex = X86_GREG_xDI;
10482 break;
10483 case 4:
10484 idxGstRegBase = X86_GREG_xSI;
10485 idxGstRegIndex = UINT8_MAX;
10486 break;
10487 case 5:
10488 idxGstRegBase = X86_GREG_xDI;
10489 idxGstRegIndex = UINT8_MAX;
10490 break;
10491 case 6:
10492 idxGstRegBase = X86_GREG_xBP;
10493 idxGstRegIndex = UINT8_MAX;
10494 break;
10495#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
10496 default:
10497#endif
10498 case 7:
10499 idxGstRegBase = X86_GREG_xBX;
10500 idxGstRegIndex = UINT8_MAX;
10501 break;
10502 }
10503
10504 /*
10505 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
10506 */
10507 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10508 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10509 kIemNativeGstRegUse_ReadOnly);
10510 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
10511 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10512 kIemNativeGstRegUse_ReadOnly)
10513 : UINT8_MAX;
10514#ifdef RT_ARCH_AMD64
10515 if (idxRegIndex == UINT8_MAX)
10516 {
10517 if (u16EffAddr == 0)
10518 {
10519 /* movxz ret, base */
10520 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
10521 }
10522 else
10523 {
10524 /* lea ret32, [base64 + disp32] */
10525 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10526 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10527 if (idxRegRet >= 8 || idxRegBase >= 8)
10528 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10529 pbCodeBuf[off++] = 0x8d;
10530 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10531 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
10532 else
10533 {
10534 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
10535 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10536 }
10537 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10538 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10539 pbCodeBuf[off++] = 0;
10540 pbCodeBuf[off++] = 0;
10541 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10542
10543 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10544 }
10545 }
10546 else
10547 {
10548 /* lea ret32, [index64 + base64 (+ disp32)] */
10549 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10550 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10551 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10552 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10553 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10554 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10555 pbCodeBuf[off++] = 0x8d;
10556 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
10557 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10558 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
10559 if (bMod == X86_MOD_MEM4)
10560 {
10561 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10562 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10563 pbCodeBuf[off++] = 0;
10564 pbCodeBuf[off++] = 0;
10565 }
10566 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10567 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10568 }
10569
10570#elif defined(RT_ARCH_ARM64)
10571 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
10572 if (u16EffAddr == 0)
10573 {
10574 if (idxRegIndex == UINT8_MAX)
10575 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
10576 else
10577 {
10578 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
10579 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10580 }
10581 }
10582 else
10583 {
10584 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
10585 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
10586 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
10587 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10588 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
10589 else
10590 {
10591 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
10592 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10593 }
10594 if (idxRegIndex != UINT8_MAX)
10595 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
10596 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10597 }
10598
10599#else
10600# error "port me"
10601#endif
10602
10603 if (idxRegIndex != UINT8_MAX)
10604 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10605 iemNativeRegFreeTmp(pReNative, idxRegBase);
10606 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10607 return off;
10608}
10609
10610
10611#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
10612 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
10613
10614/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
10615 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
10616DECL_INLINE_THROW(uint32_t)
10617iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10618 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
10619{
10620 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10621
10622 /*
10623 * Handle the disp32 form with no registers first.
10624 *
10625 * Convert to an immediate value, as that'll delay the register allocation
10626 * and assignment till the memory access / call / whatever and we can use
10627 * a more appropriate register (or none at all).
10628 */
10629 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10630 {
10631 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
10632 return off;
10633 }
10634
10635 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10636 uint32_t u32EffAddr = 0;
10637 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10638 {
10639 case 0: break;
10640 case 1: u32EffAddr = (int8_t)u32Disp; break;
10641 case 2: u32EffAddr = u32Disp; break;
10642 default: AssertFailed();
10643 }
10644
10645 /* Get the register (or SIB) value. */
10646 uint8_t idxGstRegBase = UINT8_MAX;
10647 uint8_t idxGstRegIndex = UINT8_MAX;
10648 uint8_t cShiftIndex = 0;
10649 switch (bRm & X86_MODRM_RM_MASK)
10650 {
10651 case 0: idxGstRegBase = X86_GREG_xAX; break;
10652 case 1: idxGstRegBase = X86_GREG_xCX; break;
10653 case 2: idxGstRegBase = X86_GREG_xDX; break;
10654 case 3: idxGstRegBase = X86_GREG_xBX; break;
10655 case 4: /* SIB */
10656 {
10657 /* index /w scaling . */
10658 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10659 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10660 {
10661 case 0: idxGstRegIndex = X86_GREG_xAX; break;
10662 case 1: idxGstRegIndex = X86_GREG_xCX; break;
10663 case 2: idxGstRegIndex = X86_GREG_xDX; break;
10664 case 3: idxGstRegIndex = X86_GREG_xBX; break;
10665 case 4: cShiftIndex = 0; /*no index*/ break;
10666 case 5: idxGstRegIndex = X86_GREG_xBP; break;
10667 case 6: idxGstRegIndex = X86_GREG_xSI; break;
10668 case 7: idxGstRegIndex = X86_GREG_xDI; break;
10669 }
10670
10671 /* base */
10672 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
10673 {
10674 case 0: idxGstRegBase = X86_GREG_xAX; break;
10675 case 1: idxGstRegBase = X86_GREG_xCX; break;
10676 case 2: idxGstRegBase = X86_GREG_xDX; break;
10677 case 3: idxGstRegBase = X86_GREG_xBX; break;
10678 case 4:
10679 idxGstRegBase = X86_GREG_xSP;
10680 u32EffAddr += uSibAndRspOffset >> 8;
10681 break;
10682 case 5:
10683 if ((bRm & X86_MODRM_MOD_MASK) != 0)
10684 idxGstRegBase = X86_GREG_xBP;
10685 else
10686 {
10687 Assert(u32EffAddr == 0);
10688 u32EffAddr = u32Disp;
10689 }
10690 break;
10691 case 6: idxGstRegBase = X86_GREG_xSI; break;
10692 case 7: idxGstRegBase = X86_GREG_xDI; break;
10693 }
10694 break;
10695 }
10696 case 5: idxGstRegBase = X86_GREG_xBP; break;
10697 case 6: idxGstRegBase = X86_GREG_xSI; break;
10698 case 7: idxGstRegBase = X86_GREG_xDI; break;
10699 }
10700
10701 /*
10702 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10703 * the start of the function.
10704 */
10705 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10706 {
10707 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
10708 return off;
10709 }
10710
10711 /*
10712 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10713 */
10714 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10715 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10716 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10717 kIemNativeGstRegUse_ReadOnly);
10718 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10719 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10720 kIemNativeGstRegUse_ReadOnly);
10721
10722 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10723 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10724 {
10725 idxRegBase = idxRegIndex;
10726 idxRegIndex = UINT8_MAX;
10727 }
10728
10729#ifdef RT_ARCH_AMD64
10730 if (idxRegIndex == UINT8_MAX)
10731 {
10732 if (u32EffAddr == 0)
10733 {
10734 /* mov ret, base */
10735 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10736 }
10737 else
10738 {
10739 /* lea ret32, [base64 + disp32] */
10740 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10741 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10742 if (idxRegRet >= 8 || idxRegBase >= 8)
10743 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10744 pbCodeBuf[off++] = 0x8d;
10745 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10746 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10747 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10748 else
10749 {
10750 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10751 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10752 }
10753 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10754 if (bMod == X86_MOD_MEM4)
10755 {
10756 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10757 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10758 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10759 }
10760 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10761 }
10762 }
10763 else
10764 {
10765 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10766 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10767 if (idxRegBase == UINT8_MAX)
10768 {
10769 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
10770 if (idxRegRet >= 8 || idxRegIndex >= 8)
10771 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10772 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10773 pbCodeBuf[off++] = 0x8d;
10774 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10775 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10776 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10777 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10778 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10779 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10780 }
10781 else
10782 {
10783 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10784 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10785 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10786 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10787 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10788 pbCodeBuf[off++] = 0x8d;
10789 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10790 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10791 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10792 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10793 if (bMod != X86_MOD_MEM0)
10794 {
10795 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10796 if (bMod == X86_MOD_MEM4)
10797 {
10798 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10799 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10800 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10801 }
10802 }
10803 }
10804 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10805 }
10806
10807#elif defined(RT_ARCH_ARM64)
10808 if (u32EffAddr == 0)
10809 {
10810 if (idxRegIndex == UINT8_MAX)
10811 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10812 else if (idxRegBase == UINT8_MAX)
10813 {
10814 if (cShiftIndex == 0)
10815 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
10816 else
10817 {
10818 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10819 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
10820 }
10821 }
10822 else
10823 {
10824 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10825 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10826 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10827 }
10828 }
10829 else
10830 {
10831 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
10832 {
10833 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10834 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
10835 }
10836 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
10837 {
10838 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10839 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10840 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
10841 }
10842 else
10843 {
10844 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
10845 if (idxRegBase != UINT8_MAX)
10846 {
10847 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10848 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10849 }
10850 }
10851 if (idxRegIndex != UINT8_MAX)
10852 {
10853 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10854 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10855 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10856 }
10857 }
10858
10859#else
10860# error "port me"
10861#endif
10862
10863 if (idxRegIndex != UINT8_MAX)
10864 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10865 if (idxRegBase != UINT8_MAX)
10866 iemNativeRegFreeTmp(pReNative, idxRegBase);
10867 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10868 return off;
10869}
10870
10871
10872#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10873 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10874 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10875
10876#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10877 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10878 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10879
10880#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10881 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10882 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
10883
10884/**
10885 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
10886 *
10887 * @returns New off.
10888 * @param pReNative .
10889 * @param off .
10890 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
10891 * bit 4 to REX.X. The two bits are part of the
10892 * REG sub-field, which isn't needed in this
10893 * function.
10894 * @param uSibAndRspOffset Two parts:
10895 * - The first 8 bits make up the SIB byte.
10896 * - The next 8 bits are the fixed RSP/ESP offset
10897 * in case of a pop [xSP].
10898 * @param u32Disp The displacement byte/word/dword, if any.
10899 * @param cbInstr The size of the fully decoded instruction. Used
10900 * for RIP relative addressing.
10901 * @param idxVarRet The result variable number.
10902 * @param f64Bit Whether to use a 64-bit or 32-bit address size
10903 * when calculating the address.
10904 *
10905 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
10906 */
10907DECL_INLINE_THROW(uint32_t)
10908iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
10909 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
10910{
10911 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10912
10913 /*
10914 * Special case the rip + disp32 form first.
10915 */
10916 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10917 {
10918 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10919 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
10920 kIemNativeGstRegUse_ReadOnly);
10921#ifdef RT_ARCH_AMD64
10922 if (f64Bit)
10923 {
10924 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
10925 if ((int32_t)offFinalDisp == offFinalDisp)
10926 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
10927 else
10928 {
10929 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
10930 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
10931 }
10932 }
10933 else
10934 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
10935
10936#elif defined(RT_ARCH_ARM64)
10937 if (f64Bit)
10938 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10939 (int64_t)(int32_t)u32Disp + cbInstr);
10940 else
10941 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10942 (int32_t)u32Disp + cbInstr);
10943
10944#else
10945# error "Port me!"
10946#endif
10947 iemNativeRegFreeTmp(pReNative, idxRegPc);
10948 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10949 return off;
10950 }
10951
10952 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10953 int64_t i64EffAddr = 0;
10954 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10955 {
10956 case 0: break;
10957 case 1: i64EffAddr = (int8_t)u32Disp; break;
10958 case 2: i64EffAddr = (int32_t)u32Disp; break;
10959 default: AssertFailed();
10960 }
10961
10962 /* Get the register (or SIB) value. */
10963 uint8_t idxGstRegBase = UINT8_MAX;
10964 uint8_t idxGstRegIndex = UINT8_MAX;
10965 uint8_t cShiftIndex = 0;
10966 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
10967 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
10968 else /* SIB: */
10969 {
10970 /* index /w scaling . */
10971 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10972 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10973 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
10974 if (idxGstRegIndex == 4)
10975 {
10976 /* no index */
10977 cShiftIndex = 0;
10978 idxGstRegIndex = UINT8_MAX;
10979 }
10980
10981 /* base */
10982 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
10983 if (idxGstRegBase == 4)
10984 {
10985 /* pop [rsp] hack */
10986 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
10987 }
10988 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
10989 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
10990 {
10991 /* mod=0 and base=5 -> disp32, no base reg. */
10992 Assert(i64EffAddr == 0);
10993 i64EffAddr = (int32_t)u32Disp;
10994 idxGstRegBase = UINT8_MAX;
10995 }
10996 }
10997
10998 /*
10999 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11000 * the start of the function.
11001 */
11002 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11003 {
11004 if (f64Bit)
11005 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
11006 else
11007 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
11008 return off;
11009 }
11010
11011 /*
11012 * Now emit code that calculates:
11013 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11014 * or if !f64Bit:
11015 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11016 */
11017 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11018 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11019 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11020 kIemNativeGstRegUse_ReadOnly);
11021 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11022 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11023 kIemNativeGstRegUse_ReadOnly);
11024
11025 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11026 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11027 {
11028 idxRegBase = idxRegIndex;
11029 idxRegIndex = UINT8_MAX;
11030 }
11031
11032#ifdef RT_ARCH_AMD64
11033 uint8_t bFinalAdj;
11034 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
11035 bFinalAdj = 0; /* likely */
11036 else
11037 {
11038 /* pop [rsp] with a problematic disp32 value. Split out the
11039 RSP offset and add it separately afterwards (bFinalAdj). */
11040 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
11041 Assert(idxGstRegBase == X86_GREG_xSP);
11042 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
11043 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
11044 Assert(bFinalAdj != 0);
11045 i64EffAddr -= bFinalAdj;
11046 Assert((int32_t)i64EffAddr == i64EffAddr);
11047 }
11048 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
11049//pReNative->pInstrBuf[off++] = 0xcc;
11050
11051 if (idxRegIndex == UINT8_MAX)
11052 {
11053 if (u32EffAddr == 0)
11054 {
11055 /* mov ret, base */
11056 if (f64Bit)
11057 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
11058 else
11059 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11060 }
11061 else
11062 {
11063 /* lea ret, [base + disp32] */
11064 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11065 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11066 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
11067 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11068 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11069 | (f64Bit ? X86_OP_REX_W : 0);
11070 pbCodeBuf[off++] = 0x8d;
11071 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11072 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11073 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11074 else
11075 {
11076 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11077 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11078 }
11079 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11080 if (bMod == X86_MOD_MEM4)
11081 {
11082 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11083 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11084 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11085 }
11086 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11087 }
11088 }
11089 else
11090 {
11091 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11092 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11093 if (idxRegBase == UINT8_MAX)
11094 {
11095 /* lea ret, [(index64 << cShiftIndex) + disp32] */
11096 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
11097 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11098 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11099 | (f64Bit ? X86_OP_REX_W : 0);
11100 pbCodeBuf[off++] = 0x8d;
11101 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11102 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11103 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11104 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11105 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11106 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11107 }
11108 else
11109 {
11110 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11111 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11112 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11113 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11114 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11115 | (f64Bit ? X86_OP_REX_W : 0);
11116 pbCodeBuf[off++] = 0x8d;
11117 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11118 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11119 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11120 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11121 if (bMod != X86_MOD_MEM0)
11122 {
11123 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11124 if (bMod == X86_MOD_MEM4)
11125 {
11126 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11127 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11128 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11129 }
11130 }
11131 }
11132 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11133 }
11134
11135 if (!bFinalAdj)
11136 { /* likely */ }
11137 else
11138 {
11139 Assert(f64Bit);
11140 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
11141 }
11142
11143#elif defined(RT_ARCH_ARM64)
11144 if (i64EffAddr == 0)
11145 {
11146 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11147 if (idxRegIndex == UINT8_MAX)
11148 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
11149 else if (idxRegBase != UINT8_MAX)
11150 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11151 f64Bit, false /*fSetFlags*/, cShiftIndex);
11152 else
11153 {
11154 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
11155 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
11156 }
11157 }
11158 else
11159 {
11160 if (f64Bit)
11161 { /* likely */ }
11162 else
11163 i64EffAddr = (int32_t)i64EffAddr;
11164
11165 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
11166 {
11167 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11168 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
11169 }
11170 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
11171 {
11172 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11173 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
11174 }
11175 else
11176 {
11177 if (f64Bit)
11178 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
11179 else
11180 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
11181 if (idxRegBase != UINT8_MAX)
11182 {
11183 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11184 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
11185 }
11186 }
11187 if (idxRegIndex != UINT8_MAX)
11188 {
11189 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11190 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11191 f64Bit, false /*fSetFlags*/, cShiftIndex);
11192 }
11193 }
11194
11195#else
11196# error "port me"
11197#endif
11198
11199 if (idxRegIndex != UINT8_MAX)
11200 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11201 if (idxRegBase != UINT8_MAX)
11202 iemNativeRegFreeTmp(pReNative, idxRegBase);
11203 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11204 return off;
11205}
11206
11207
11208/*********************************************************************************************************************************
11209* TLB Lookup. *
11210*********************************************************************************************************************************/
11211
11212/**
11213 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
11214 */
11215DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
11216{
11217 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
11218 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
11219 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
11220 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
11221
11222 /* Do the lookup manually. */
11223 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
11224 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
11225 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
11226 if (RT_LIKELY(pTlbe->uTag == uTag))
11227 {
11228 /*
11229 * Check TLB page table level access flags.
11230 */
11231 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
11232 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
11233 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
11234 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
11235 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
11236 | IEMTLBE_F_PG_UNASSIGNED
11237 | IEMTLBE_F_PT_NO_ACCESSED
11238 | fNoWriteNoDirty | fNoUser);
11239 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
11240 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
11241 {
11242 /*
11243 * Return the address.
11244 */
11245 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
11246 if ((uintptr_t)pbAddr == uResult)
11247 return;
11248 RT_NOREF(cbMem);
11249 AssertFailed();
11250 }
11251 else
11252 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
11253 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
11254 }
11255 else
11256 AssertFailed();
11257 RT_BREAKPOINT();
11258}
11259
11260/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
11261
11262
11263/*********************************************************************************************************************************
11264* Memory fetches and stores common *
11265*********************************************************************************************************************************/
11266
11267typedef enum IEMNATIVEMITMEMOP
11268{
11269 kIemNativeEmitMemOp_Store = 0,
11270 kIemNativeEmitMemOp_Fetch,
11271 kIemNativeEmitMemOp_Fetch_Zx_U16,
11272 kIemNativeEmitMemOp_Fetch_Zx_U32,
11273 kIemNativeEmitMemOp_Fetch_Zx_U64,
11274 kIemNativeEmitMemOp_Fetch_Sx_U16,
11275 kIemNativeEmitMemOp_Fetch_Sx_U32,
11276 kIemNativeEmitMemOp_Fetch_Sx_U64
11277} IEMNATIVEMITMEMOP;
11278
11279/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
11280 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
11281 * (with iSegReg = UINT8_MAX). */
11282DECL_INLINE_THROW(uint32_t)
11283iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
11284 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
11285 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
11286{
11287 /*
11288 * Assert sanity.
11289 */
11290 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11291 Assert( enmOp != kIemNativeEmitMemOp_Store
11292 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
11293 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
11294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11295 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
11296 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
11297 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11298 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11299 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
11300 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11301#ifdef VBOX_STRICT
11302 if (iSegReg == UINT8_MAX)
11303 {
11304 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11305 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11306 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11307 switch (cbMem)
11308 {
11309 case 1:
11310 Assert( pfnFunction
11311 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
11312 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11313 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11314 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11315 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11316 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
11317 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
11318 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
11319 : UINT64_C(0xc000b000a0009000) ));
11320 break;
11321 case 2:
11322 Assert( pfnFunction
11323 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
11324 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11325 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11326 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11327 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
11328 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
11329 : UINT64_C(0xc000b000a0009000) ));
11330 break;
11331 case 4:
11332 Assert( pfnFunction
11333 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
11334 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11335 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11336 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
11337 : UINT64_C(0xc000b000a0009000) ));
11338 break;
11339 case 8:
11340 Assert( pfnFunction
11341 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
11342 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
11343 : UINT64_C(0xc000b000a0009000) ));
11344 break;
11345 }
11346 }
11347 else
11348 {
11349 Assert(iSegReg < 6);
11350 switch (cbMem)
11351 {
11352 case 1:
11353 Assert( pfnFunction
11354 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
11355 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
11356 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11357 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11358 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11359 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
11360 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
11361 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
11362 : UINT64_C(0xc000b000a0009000) ));
11363 break;
11364 case 2:
11365 Assert( pfnFunction
11366 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11367 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11368 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11369 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11370 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11371 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11372 : UINT64_C(0xc000b000a0009000) ));
11373 break;
11374 case 4:
11375 Assert( pfnFunction
11376 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11377 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11378 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11379 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11380 : UINT64_C(0xc000b000a0009000) ));
11381 break;
11382 case 8:
11383 Assert( pfnFunction
11384 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11385 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11386 : UINT64_C(0xc000b000a0009000) ));
11387 break;
11388 }
11389 }
11390#endif
11391
11392#ifdef VBOX_STRICT
11393 /*
11394 * Check that the fExec flags we've got make sense.
11395 */
11396 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11397#endif
11398
11399 /*
11400 * To keep things simple we have to commit any pending writes first as we
11401 * may end up making calls.
11402 */
11403 /** @todo we could postpone this till we make the call and reload the
11404 * registers after returning from the call. Not sure if that's sensible or
11405 * not, though. */
11406 off = iemNativeRegFlushPendingWrites(pReNative, off);
11407
11408#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11409 /*
11410 * Move/spill/flush stuff out of call-volatile registers.
11411 * This is the easy way out. We could contain this to the tlb-miss branch
11412 * by saving and restoring active stuff here.
11413 */
11414 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11415#endif
11416
11417 /*
11418 * Define labels and allocate the result register (trying for the return
11419 * register if we can).
11420 */
11421 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11422 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
11423 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11424 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
11425 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
11426 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
11427 uint8_t const idxRegValueStore = !TlbState.fSkip
11428 && enmOp == kIemNativeEmitMemOp_Store
11429 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11430 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
11431 : UINT8_MAX;
11432 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11433 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11434 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11435 : UINT32_MAX;
11436
11437 /*
11438 * Jump to the TLB lookup code.
11439 */
11440 if (!TlbState.fSkip)
11441 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11442
11443 /*
11444 * TlbMiss:
11445 *
11446 * Call helper to do the fetching.
11447 * We flush all guest register shadow copies here.
11448 */
11449 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11450
11451#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11452 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11453#else
11454 RT_NOREF(idxInstr);
11455#endif
11456
11457#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11458 /* Save variables in volatile registers. */
11459 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11460 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
11461 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
11462 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11463#endif
11464
11465 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
11466 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
11467 if (enmOp == kIemNativeEmitMemOp_Store)
11468 {
11469 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
11470 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
11471#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11472 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11473#else
11474 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
11475 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
11476#endif
11477 }
11478
11479 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
11480 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
11481#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11482 fVolGregMask);
11483#else
11484 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
11485#endif
11486
11487 if (iSegReg != UINT8_MAX)
11488 {
11489 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
11490 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11491 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
11492 }
11493
11494 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11495 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11496
11497 /* Done setting up parameters, make the call. */
11498 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11499
11500 /*
11501 * Put the result in the right register if this is a fetch.
11502 */
11503 if (enmOp != kIemNativeEmitMemOp_Store)
11504 {
11505 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
11506 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
11507 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
11508 }
11509
11510#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11511 /* Restore variables and guest shadow registers to volatile registers. */
11512 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11513 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11514#endif
11515
11516#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11517 if (!TlbState.fSkip)
11518 {
11519 /* end of TlbMiss - Jump to the done label. */
11520 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11521 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11522
11523 /*
11524 * TlbLookup:
11525 */
11526 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
11527 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
11528 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
11529
11530 /*
11531 * Emit code to do the actual storing / fetching.
11532 */
11533 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11534# ifdef VBOX_WITH_STATISTICS
11535 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11536 enmOp == kIemNativeEmitMemOp_Store
11537 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
11538 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
11539# endif
11540 switch (enmOp)
11541 {
11542 case kIemNativeEmitMemOp_Store:
11543 if (pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate)
11544 {
11545 switch (cbMem)
11546 {
11547 case 1:
11548 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11549 break;
11550 case 2:
11551 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11552 break;
11553 case 4:
11554 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11555 break;
11556 case 8:
11557 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11558 break;
11559 default:
11560 AssertFailed();
11561 }
11562 }
11563 else
11564 {
11565 switch (cbMem)
11566 {
11567 case 1:
11568 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off,
11569 (uint8_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11570 idxRegMemResult, TlbState.idxReg1);
11571 break;
11572 case 2:
11573 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
11574 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11575 idxRegMemResult, TlbState.idxReg1);
11576 break;
11577 case 4:
11578 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
11579 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11580 idxRegMemResult, TlbState.idxReg1);
11581 break;
11582 case 8:
11583 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11584 idxRegMemResult, TlbState.idxReg1);
11585 break;
11586 default:
11587 AssertFailed();
11588 }
11589 }
11590 break;
11591
11592 case kIemNativeEmitMemOp_Fetch:
11593 case kIemNativeEmitMemOp_Fetch_Zx_U16:
11594 case kIemNativeEmitMemOp_Fetch_Zx_U32:
11595 case kIemNativeEmitMemOp_Fetch_Zx_U64:
11596 switch (cbMem)
11597 {
11598 case 1:
11599 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11600 break;
11601 case 2:
11602 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11603 break;
11604 case 4:
11605 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11606 break;
11607 case 8:
11608 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11609 break;
11610 default:
11611 AssertFailed();
11612 }
11613 break;
11614
11615 case kIemNativeEmitMemOp_Fetch_Sx_U16:
11616 Assert(cbMem == 1);
11617 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11618 break;
11619
11620 case kIemNativeEmitMemOp_Fetch_Sx_U32:
11621 Assert(cbMem == 1 || cbMem == 2);
11622 if (cbMem == 1)
11623 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11624 else
11625 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11626 break;
11627
11628 case kIemNativeEmitMemOp_Fetch_Sx_U64:
11629 switch (cbMem)
11630 {
11631 case 1:
11632 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11633 break;
11634 case 2:
11635 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11636 break;
11637 case 4:
11638 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11639 break;
11640 default:
11641 AssertFailed();
11642 }
11643 break;
11644
11645 default:
11646 AssertFailed();
11647 }
11648
11649 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11650
11651 /*
11652 * TlbDone:
11653 */
11654 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11655
11656 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
11657
11658# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11659 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
11660 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11661# endif
11662 }
11663#else
11664 RT_NOREF(fAlignMask, idxLabelTlbMiss);
11665#endif
11666
11667 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
11668 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11669 return off;
11670}
11671
11672
11673
11674/*********************************************************************************************************************************
11675* Memory fetches (IEM_MEM_FETCH_XXX). *
11676*********************************************************************************************************************************/
11677
11678/* 8-bit segmented: */
11679#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
11680 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
11681 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11682 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11683
11684#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11685 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11686 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11687 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11688
11689#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11690 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11691 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11692 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11693
11694#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11695 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11696 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11697 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11698
11699#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11700 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11701 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11702 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11703
11704#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11705 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11706 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11707 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11708
11709#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11710 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11711 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11712 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11713
11714/* 16-bit segmented: */
11715#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11716 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11717 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11718 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11719
11720#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11721 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11722 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11723 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11724
11725#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11726 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11727 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11728 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11729
11730#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11731 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11732 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11733 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11734
11735#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11736 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11737 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11738 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11739
11740#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11741 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11742 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11743 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11744
11745
11746/* 32-bit segmented: */
11747#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11748 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11749 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11750 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11751
11752#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11753 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11754 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11755 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11756
11757#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11758 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11759 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11760 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11761
11762#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11763 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11764 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11765 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11766
11767
11768/* 64-bit segmented: */
11769#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11770 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11771 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11772 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
11773
11774
11775
11776/* 8-bit flat: */
11777#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
11778 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
11779 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11780 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11781
11782#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
11783 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11784 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11785 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11786
11787#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
11788 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11789 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11790 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11791
11792#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
11793 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11794 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11795 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11796
11797#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
11798 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11799 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11800 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11801
11802#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
11803 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11804 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11805 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11806
11807#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
11808 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11809 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11810 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11811
11812
11813/* 16-bit flat: */
11814#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
11815 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11816 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11817 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11818
11819#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
11820 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11821 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11822 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11823
11824#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
11825 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11826 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11827 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11828
11829#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
11830 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11831 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11832 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11833
11834#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
11835 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11836 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11837 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11838
11839#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
11840 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11841 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11842 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11843
11844/* 32-bit flat: */
11845#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
11846 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11847 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11848 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11849
11850#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
11851 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11852 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11853 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11854
11855#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
11856 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11857 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11858 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11859
11860#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
11861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11862 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11863 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11864
11865/* 64-bit flat: */
11866#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
11867 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11868 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11869 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
11870
11871
11872
11873/*********************************************************************************************************************************
11874* Memory stores (IEM_MEM_STORE_XXX). *
11875*********************************************************************************************************************************/
11876
11877#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
11878 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
11879 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11880 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11881
11882#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
11883 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
11884 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11885 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11886
11887#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
11888 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
11889 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11890 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11891
11892#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
11893 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
11894 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11895 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11896
11897
11898#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
11899 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
11900 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11901 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11902
11903#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
11904 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
11905 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11906 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11907
11908#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
11909 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
11910 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11911 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11912
11913#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
11914 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
11915 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11916 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11917
11918
11919#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
11920 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11921 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11922
11923#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
11924 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11925 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11926
11927#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
11928 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11929 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11930
11931#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
11932 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11933 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11934
11935
11936#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
11937 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11938 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11939
11940#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
11941 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11942 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11943
11944#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
11945 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11946 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11947
11948#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
11949 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11950 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11951
11952/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
11953 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
11954DECL_INLINE_THROW(uint32_t)
11955iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
11956 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
11957{
11958 /*
11959 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
11960 * to do the grunt work.
11961 */
11962 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
11963 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
11964 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
11965 pfnFunction, idxInstr);
11966 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
11967 return off;
11968}
11969
11970
11971
11972/*********************************************************************************************************************************
11973* Stack Accesses. *
11974*********************************************************************************************************************************/
11975/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
11976#define IEM_MC_PUSH_U16(a_u16Value) \
11977 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11978 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
11979#define IEM_MC_PUSH_U32(a_u32Value) \
11980 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11981 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
11982#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
11983 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
11984 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
11985#define IEM_MC_PUSH_U64(a_u64Value) \
11986 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11987 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
11988
11989#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
11990 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11991 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11992#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
11993 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11994 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
11995#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
11996 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
11997 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
11998
11999#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
12000 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12001 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12002#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
12003 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12004 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
12005
12006
12007DECL_FORCE_INLINE_THROW(uint32_t)
12008iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12009{
12010 /* Use16BitSp: */
12011#ifdef RT_ARCH_AMD64
12012 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12013 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12014#else
12015 /* sub regeff, regrsp, #cbMem */
12016 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
12017 /* and regeff, regeff, #0xffff */
12018 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12019 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
12020 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
12021 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
12022#endif
12023 return off;
12024}
12025
12026
12027DECL_FORCE_INLINE(uint32_t)
12028iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12029{
12030 /* Use32BitSp: */
12031 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12032 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12033 return off;
12034}
12035
12036
12037/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
12038DECL_INLINE_THROW(uint32_t)
12039iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
12040 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12041{
12042 /*
12043 * Assert sanity.
12044 */
12045 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12046#ifdef VBOX_STRICT
12047 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12048 {
12049 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12050 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12051 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12052 Assert( pfnFunction
12053 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12054 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
12055 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
12056 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12057 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
12058 : UINT64_C(0xc000b000a0009000) ));
12059 }
12060 else
12061 Assert( pfnFunction
12062 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
12063 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
12064 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
12065 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
12066 : UINT64_C(0xc000b000a0009000) ));
12067#endif
12068
12069#ifdef VBOX_STRICT
12070 /*
12071 * Check that the fExec flags we've got make sense.
12072 */
12073 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12074#endif
12075
12076 /*
12077 * To keep things simple we have to commit any pending writes first as we
12078 * may end up making calls.
12079 */
12080 /** @todo we could postpone this till we make the call and reload the
12081 * registers after returning from the call. Not sure if that's sensible or
12082 * not, though. */
12083 off = iemNativeRegFlushPendingWrites(pReNative, off);
12084
12085 /*
12086 * First we calculate the new RSP and the effective stack pointer value.
12087 * For 64-bit mode and flat 32-bit these two are the same.
12088 * (Code structure is very similar to that of PUSH)
12089 */
12090 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12091 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
12092 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
12093 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
12094 ? cbMem : sizeof(uint16_t);
12095 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12096 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12097 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12098 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12099 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12100 if (cBitsFlat != 0)
12101 {
12102 Assert(idxRegEffSp == idxRegRsp);
12103 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12104 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12105 if (cBitsFlat == 64)
12106 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
12107 else
12108 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
12109 }
12110 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12111 {
12112 Assert(idxRegEffSp != idxRegRsp);
12113 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12114 kIemNativeGstRegUse_ReadOnly);
12115#ifdef RT_ARCH_AMD64
12116 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12117#else
12118 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12119#endif
12120 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12121 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12122 offFixupJumpToUseOtherBitSp = off;
12123 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12124 {
12125 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12126 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12127 }
12128 else
12129 {
12130 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12131 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12132 }
12133 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12134 }
12135 /* SpUpdateEnd: */
12136 uint32_t const offLabelSpUpdateEnd = off;
12137
12138 /*
12139 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12140 * we're skipping lookup).
12141 */
12142 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12143 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
12144 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12145 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12146 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12147 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12148 : UINT32_MAX;
12149 uint8_t const idxRegValue = !TlbState.fSkip
12150 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
12151 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
12152 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
12153 : UINT8_MAX;
12154 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
12155
12156
12157 if (!TlbState.fSkip)
12158 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12159 else
12160 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12161
12162 /*
12163 * Use16BitSp:
12164 */
12165 if (cBitsFlat == 0)
12166 {
12167#ifdef RT_ARCH_AMD64
12168 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12169#else
12170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12171#endif
12172 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12173 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12174 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12175 else
12176 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12177 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12178 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12179 }
12180
12181 /*
12182 * TlbMiss:
12183 *
12184 * Call helper to do the pushing.
12185 */
12186 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12187
12188#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12189 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12190#else
12191 RT_NOREF(idxInstr);
12192#endif
12193
12194 /* Save variables in volatile registers. */
12195 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12196 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12197 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
12198 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
12199 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12200
12201 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
12202 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
12203 {
12204 /* Swap them using ARG0 as temp register: */
12205 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
12206 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
12207 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
12208 }
12209 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
12210 {
12211 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
12212 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
12213 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12214
12215 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
12216 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12217 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12218 }
12219 else
12220 {
12221 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
12222 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12223
12224 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
12225 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
12226 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
12227 }
12228
12229 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12230 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12231
12232 /* Done setting up parameters, make the call. */
12233 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12234
12235 /* Restore variables and guest shadow registers to volatile registers. */
12236 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12237 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12238
12239#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12240 if (!TlbState.fSkip)
12241 {
12242 /* end of TlbMiss - Jump to the done label. */
12243 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12244 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12245
12246 /*
12247 * TlbLookup:
12248 */
12249 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
12250 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12251
12252 /*
12253 * Emit code to do the actual storing / fetching.
12254 */
12255 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12256# ifdef VBOX_WITH_STATISTICS
12257 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12258 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12259# endif
12260 if (idxRegValue != UINT8_MAX)
12261 {
12262 switch (cbMemAccess)
12263 {
12264 case 2:
12265 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12266 break;
12267 case 4:
12268 if (!fIsIntelSeg)
12269 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12270 else
12271 {
12272 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
12273 PUSH FS in real mode, so we have to try emulate that here.
12274 We borrow the now unused idxReg1 from the TLB lookup code here. */
12275 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
12276 kIemNativeGstReg_EFlags);
12277 if (idxRegEfl != UINT8_MAX)
12278 {
12279#ifdef ARCH_AMD64
12280 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
12281 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12282 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12283#else
12284 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
12285 off, TlbState.idxReg1, idxRegEfl,
12286 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12287#endif
12288 iemNativeRegFreeTmp(pReNative, idxRegEfl);
12289 }
12290 else
12291 {
12292 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
12293 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
12294 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12295 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12296 }
12297 /* ASSUMES the upper half of idxRegValue is ZERO. */
12298 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
12299 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
12300 }
12301 break;
12302 case 8:
12303 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12304 break;
12305 default:
12306 AssertFailed();
12307 }
12308 }
12309 else
12310 {
12311 switch (cbMemAccess)
12312 {
12313 case 2:
12314 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
12315 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
12316 idxRegMemResult, TlbState.idxReg1);
12317 break;
12318 case 4:
12319 Assert(!fIsSegReg);
12320 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
12321 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
12322 idxRegMemResult, TlbState.idxReg1);
12323 break;
12324 case 8:
12325 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
12326 idxRegMemResult, TlbState.idxReg1);
12327 break;
12328 default:
12329 AssertFailed();
12330 }
12331 }
12332
12333 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12334 TlbState.freeRegsAndReleaseVars(pReNative);
12335
12336 /*
12337 * TlbDone:
12338 *
12339 * Commit the new RSP value.
12340 */
12341 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12342 }
12343#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12344
12345 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
12346 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12347 if (idxRegEffSp != idxRegRsp)
12348 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12349
12350 /* The value variable is implictly flushed. */
12351 if (idxRegValue != UINT8_MAX)
12352 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12353 iemNativeVarFreeLocal(pReNative, idxVarValue);
12354
12355 return off;
12356}
12357
12358
12359
12360/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
12361#define IEM_MC_POP_GREG_U16(a_iGReg) \
12362 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12363 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
12364#define IEM_MC_POP_GREG_U32(a_iGReg) \
12365 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12366 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
12367#define IEM_MC_POP_GREG_U64(a_iGReg) \
12368 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12369 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
12370
12371#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
12372 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12373 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12374#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
12375 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12376 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
12377
12378#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
12379 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12380 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12381#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
12382 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12383 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
12384
12385
12386DECL_FORCE_INLINE_THROW(uint32_t)
12387iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
12388 uint8_t idxRegTmp)
12389{
12390 /* Use16BitSp: */
12391#ifdef RT_ARCH_AMD64
12392 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12393 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12394 RT_NOREF(idxRegTmp);
12395#else
12396 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
12397 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
12398 /* add tmp, regrsp, #cbMem */
12399 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
12400 /* and tmp, tmp, #0xffff */
12401 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12402 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
12403 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
12404 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
12405#endif
12406 return off;
12407}
12408
12409
12410DECL_FORCE_INLINE(uint32_t)
12411iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12412{
12413 /* Use32BitSp: */
12414 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12415 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12416 return off;
12417}
12418
12419
12420/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
12421DECL_INLINE_THROW(uint32_t)
12422iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
12423 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12424{
12425 /*
12426 * Assert sanity.
12427 */
12428 Assert(idxGReg < 16);
12429#ifdef VBOX_STRICT
12430 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12431 {
12432 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12433 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12434 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12435 Assert( pfnFunction
12436 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12437 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
12438 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12439 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
12440 : UINT64_C(0xc000b000a0009000) ));
12441 }
12442 else
12443 Assert( pfnFunction
12444 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
12445 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
12446 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
12447 : UINT64_C(0xc000b000a0009000) ));
12448#endif
12449
12450#ifdef VBOX_STRICT
12451 /*
12452 * Check that the fExec flags we've got make sense.
12453 */
12454 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12455#endif
12456
12457 /*
12458 * To keep things simple we have to commit any pending writes first as we
12459 * may end up making calls.
12460 */
12461 off = iemNativeRegFlushPendingWrites(pReNative, off);
12462
12463 /*
12464 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
12465 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
12466 * directly as the effective stack pointer.
12467 * (Code structure is very similar to that of PUSH)
12468 */
12469 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12470 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12471 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12472 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12473 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12474 /** @todo can do a better job picking the register here. For cbMem >= 4 this
12475 * will be the resulting register value. */
12476 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
12477
12478 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12479 if (cBitsFlat != 0)
12480 {
12481 Assert(idxRegEffSp == idxRegRsp);
12482 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12483 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12484 }
12485 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12486 {
12487 Assert(idxRegEffSp != idxRegRsp);
12488 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12489 kIemNativeGstRegUse_ReadOnly);
12490#ifdef RT_ARCH_AMD64
12491 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12492#else
12493 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12494#endif
12495 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12496 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12497 offFixupJumpToUseOtherBitSp = off;
12498 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12499 {
12500/** @todo can skip idxRegRsp updating when popping ESP. */
12501 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12502 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12503 }
12504 else
12505 {
12506 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12507 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12508 }
12509 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12510 }
12511 /* SpUpdateEnd: */
12512 uint32_t const offLabelSpUpdateEnd = off;
12513
12514 /*
12515 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12516 * we're skipping lookup).
12517 */
12518 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12519 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
12520 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12521 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12522 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12523 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12524 : UINT32_MAX;
12525
12526 if (!TlbState.fSkip)
12527 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12528 else
12529 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12530
12531 /*
12532 * Use16BitSp:
12533 */
12534 if (cBitsFlat == 0)
12535 {
12536#ifdef RT_ARCH_AMD64
12537 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12538#else
12539 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12540#endif
12541 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12542 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12543 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12544 else
12545 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12546 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12547 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12548 }
12549
12550 /*
12551 * TlbMiss:
12552 *
12553 * Call helper to do the pushing.
12554 */
12555 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12556
12557#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12558 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12559#else
12560 RT_NOREF(idxInstr);
12561#endif
12562
12563 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12564 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12565 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
12566 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12567
12568
12569 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
12570 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12571 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12572
12573 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12574 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12575
12576 /* Done setting up parameters, make the call. */
12577 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12578
12579 /* Move the return register content to idxRegMemResult. */
12580 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12581 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12582
12583 /* Restore variables and guest shadow registers to volatile registers. */
12584 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12585 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12586
12587#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12588 if (!TlbState.fSkip)
12589 {
12590 /* end of TlbMiss - Jump to the done label. */
12591 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12592 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12593
12594 /*
12595 * TlbLookup:
12596 */
12597 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
12598 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12599
12600 /*
12601 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
12602 */
12603 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12604# ifdef VBOX_WITH_STATISTICS
12605 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12606 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12607# endif
12608 switch (cbMem)
12609 {
12610 case 2:
12611 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12612 break;
12613 case 4:
12614 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12615 break;
12616 case 8:
12617 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12618 break;
12619 default:
12620 AssertFailed();
12621 }
12622
12623 TlbState.freeRegsAndReleaseVars(pReNative);
12624
12625 /*
12626 * TlbDone:
12627 *
12628 * Set the new RSP value (FLAT accesses needs to calculate it first) and
12629 * commit the popped register value.
12630 */
12631 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12632 }
12633#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12634
12635 if (idxGReg != X86_GREG_xSP)
12636 {
12637 /* Set the register. */
12638 if (cbMem >= sizeof(uint32_t))
12639 {
12640#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
12641 AssertMsg( pReNative->idxCurCall == 0
12642 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
12643 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
12644#endif
12645 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
12646 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
12647 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12648 }
12649 else
12650 {
12651 Assert(cbMem == sizeof(uint16_t));
12652 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
12653 kIemNativeGstRegUse_ForUpdate);
12654 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
12655 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12656 iemNativeRegFreeTmp(pReNative, idxRegDst);
12657 }
12658
12659 /* Complete RSP calculation for FLAT mode. */
12660 if (idxRegEffSp == idxRegRsp)
12661 {
12662 if (cBitsFlat == 64)
12663 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12664 else
12665 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12666 }
12667 }
12668 else
12669 {
12670 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
12671 if (cbMem == sizeof(uint64_t))
12672 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
12673 else if (cbMem == sizeof(uint32_t))
12674 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
12675 else
12676 {
12677 if (idxRegEffSp == idxRegRsp)
12678 {
12679 if (cBitsFlat == 64)
12680 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12681 else
12682 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12683 }
12684 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
12685 }
12686 }
12687 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
12688
12689 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12690 if (idxRegEffSp != idxRegRsp)
12691 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12692 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12693
12694 return off;
12695}
12696
12697
12698
12699/*********************************************************************************************************************************
12700* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
12701*********************************************************************************************************************************/
12702
12703#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12704 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12705 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12706 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
12707
12708#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12709 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12710 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12711 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
12712
12713#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12714 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12715 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12716 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
12717
12718#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12719 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12720 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12721 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
12722
12723
12724#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12725 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12726 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12727 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
12728
12729#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12730 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12731 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12732 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
12733
12734#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12735 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12736 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12737 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12738
12739#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12740 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12741 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12742 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
12743
12744#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12745 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
12746 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12747 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12748
12749
12750#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12751 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12752 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12753 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
12754
12755#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12756 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12757 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12758 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
12759
12760#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12761 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12762 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12763 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12764
12765#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12766 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12767 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12768 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
12769
12770#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12771 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
12772 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12773 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12774
12775
12776#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12777 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12778 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12779 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
12780
12781#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12782 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12783 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12784 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
12785#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12786 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12787 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12788 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12789
12790#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12791 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12792 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12793 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
12794
12795#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12796 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
12797 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12798 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12799
12800
12801#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12802 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12803 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12804 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
12805
12806#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12807 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12808 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12809 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
12810
12811
12812#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12813 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12814 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12815 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
12816
12817#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12818 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12819 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12820 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
12821
12822#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12823 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12824 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12825 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
12826
12827#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12828 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12829 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12830 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
12831
12832
12833
12834#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12835 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12836 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12837 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
12838
12839#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12840 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12841 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12842 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
12843
12844#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12845 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12846 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12847 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
12848
12849#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12850 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12851 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12852 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
12853
12854
12855#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12856 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12857 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12858 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
12859
12860#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12861 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12862 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12863 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
12864
12865#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12866 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12867 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12868 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12869
12870#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12871 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12872 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12873 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
12874
12875#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
12876 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
12877 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12878 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12879
12880
12881#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12882 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12883 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12884 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
12885
12886#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12887 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12888 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12889 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
12890
12891#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12892 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12893 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12894 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12895
12896#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12897 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12898 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12899 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
12900
12901#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
12902 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
12903 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12904 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12905
12906
12907#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12908 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12909 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12910 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
12911
12912#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12913 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12914 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12915 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
12916
12917#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12918 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12919 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12920 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12921
12922#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12923 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12924 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12925 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
12926
12927#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
12928 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
12929 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12930 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12931
12932
12933#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
12934 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12935 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12936 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
12937
12938#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
12939 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12940 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12941 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
12942
12943
12944#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12945 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12946 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12947 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
12948
12949#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12950 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12951 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12952 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
12953
12954#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12955 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12956 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12957 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
12958
12959#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12960 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12961 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12962 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
12963
12964
12965DECL_INLINE_THROW(uint32_t)
12966iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
12967 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
12968 uintptr_t pfnFunction, uint8_t idxInstr)
12969{
12970 /*
12971 * Assert sanity.
12972 */
12973 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
12974 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
12975 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
12976 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12977
12978 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12979 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
12980 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
12981 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12982
12983 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12984 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
12985 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
12986 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12987
12988 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12989
12990 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12991
12992#ifdef VBOX_STRICT
12993# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
12994 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
12995 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
12996 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
12997 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
12998# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
12999 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
13000 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
13001 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
13002
13003 if (iSegReg == UINT8_MAX)
13004 {
13005 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13006 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13007 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13008 switch (cbMem)
13009 {
13010 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
13011 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
13012 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
13013 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
13014 case 10:
13015 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
13016 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
13017 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13018 break;
13019 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
13020# if 0
13021 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
13022 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
13023# endif
13024 default: AssertFailed(); break;
13025 }
13026 }
13027 else
13028 {
13029 Assert(iSegReg < 6);
13030 switch (cbMem)
13031 {
13032 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
13033 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
13034 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
13035 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
13036 case 10:
13037 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
13038 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
13039 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13040 break;
13041 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
13042# if 0
13043 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
13044 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
13045# endif
13046 default: AssertFailed(); break;
13047 }
13048 }
13049# undef IEM_MAP_HLP_FN
13050# undef IEM_MAP_HLP_FN_NO_AT
13051#endif
13052
13053#ifdef VBOX_STRICT
13054 /*
13055 * Check that the fExec flags we've got make sense.
13056 */
13057 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13058#endif
13059
13060 /*
13061 * To keep things simple we have to commit any pending writes first as we
13062 * may end up making calls.
13063 */
13064 off = iemNativeRegFlushPendingWrites(pReNative, off);
13065
13066#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13067 /*
13068 * Move/spill/flush stuff out of call-volatile registers.
13069 * This is the easy way out. We could contain this to the tlb-miss branch
13070 * by saving and restoring active stuff here.
13071 */
13072 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
13073 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13074#endif
13075
13076 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
13077 while the tlb-miss codepath will temporarily put it on the stack.
13078 Set the the type to stack here so we don't need to do it twice below. */
13079 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
13080 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
13081 /** @todo use a tmp register from TlbState, since they'll be free after tlb
13082 * lookup is done. */
13083
13084 /*
13085 * Define labels and allocate the result register (trying for the return
13086 * register if we can).
13087 */
13088 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13089 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13090 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
13091 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
13092 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
13093 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13094 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13095 : UINT32_MAX;
13096//off=iemNativeEmitBrk(pReNative, off, 0);
13097 /*
13098 * Jump to the TLB lookup code.
13099 */
13100 if (!TlbState.fSkip)
13101 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13102
13103 /*
13104 * TlbMiss:
13105 *
13106 * Call helper to do the fetching.
13107 * We flush all guest register shadow copies here.
13108 */
13109 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13110
13111#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13112 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13113#else
13114 RT_NOREF(idxInstr);
13115#endif
13116
13117#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13118 /* Save variables in volatile registers. */
13119 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
13120 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13121#endif
13122
13123 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
13124 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
13125#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13126 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13127#else
13128 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13129#endif
13130
13131 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
13132 if (iSegReg != UINT8_MAX)
13133 {
13134 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13135 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
13136 }
13137
13138 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
13139 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
13140 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
13141
13142 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13143 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13144
13145 /* Done setting up parameters, make the call. */
13146 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13147
13148 /*
13149 * Put the output in the right registers.
13150 */
13151 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
13152 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
13153 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
13154
13155#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13156 /* Restore variables and guest shadow registers to volatile registers. */
13157 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13158 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13159#endif
13160
13161 Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
13162 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
13163
13164#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13165 if (!TlbState.fSkip)
13166 {
13167 /* end of tlbsmiss - Jump to the done label. */
13168 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13169 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13170
13171 /*
13172 * TlbLookup:
13173 */
13174 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
13175 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13176# ifdef VBOX_WITH_STATISTICS
13177 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
13178 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
13179# endif
13180
13181 /* [idxVarUnmapInfo] = 0; */
13182 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
13183
13184 /*
13185 * TlbDone:
13186 */
13187 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13188
13189 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13190
13191# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13192 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13193 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13194# endif
13195 }
13196#else
13197 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
13198#endif
13199
13200 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13201 iemNativeVarRegisterRelease(pReNative, idxVarMem);
13202
13203 return off;
13204}
13205
13206
13207#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
13208 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
13209 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
13210
13211#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
13212 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
13213 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
13214
13215#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
13216 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
13217 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
13218
13219#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
13220 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
13221 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
13222
13223DECL_INLINE_THROW(uint32_t)
13224iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
13225 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
13226{
13227 /*
13228 * Assert sanity.
13229 */
13230 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13231 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
13232 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
13233 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
13234#ifdef VBOX_STRICT
13235 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
13236 {
13237 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
13238 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
13239 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
13240 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
13241 case IEM_ACCESS_TYPE_WRITE:
13242 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
13243 case IEM_ACCESS_TYPE_READ:
13244 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
13245 default: AssertFailed();
13246 }
13247#else
13248 RT_NOREF(fAccess);
13249#endif
13250
13251 /*
13252 * To keep things simple we have to commit any pending writes first as we
13253 * may end up making calls (there shouldn't be any at this point, so this
13254 * is just for consistency).
13255 */
13256 /** @todo we could postpone this till we make the call and reload the
13257 * registers after returning from the call. Not sure if that's sensible or
13258 * not, though. */
13259 off = iemNativeRegFlushPendingWrites(pReNative, off);
13260
13261 /*
13262 * Move/spill/flush stuff out of call-volatile registers.
13263 *
13264 * We exclude any register holding the bUnmapInfo variable, as we'll be
13265 * checking it after returning from the call and will free it afterwards.
13266 */
13267 /** @todo save+restore active registers and maybe guest shadows in miss
13268 * scenario. */
13269 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
13270
13271 /*
13272 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
13273 * to call the unmap helper function.
13274 *
13275 * The likelyhood of it being zero is higher than for the TLB hit when doing
13276 * the mapping, as a TLB miss for an well aligned and unproblematic memory
13277 * access should also end up with a mapping that won't need special unmapping.
13278 */
13279 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
13280 * should speed up things for the pure interpreter as well when TLBs
13281 * are enabled. */
13282#ifdef RT_ARCH_AMD64
13283 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
13284 {
13285 /* test byte [rbp - xxx], 0ffh */
13286 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
13287 pbCodeBuf[off++] = 0xf6;
13288 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
13289 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
13290 pbCodeBuf[off++] = 0xff;
13291 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13292 }
13293 else
13294#endif
13295 {
13296 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
13297 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
13298 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
13299 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13300 }
13301 uint32_t const offJmpFixup = off;
13302 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
13303
13304 /*
13305 * Call the unmap helper function.
13306 */
13307#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
13308 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13309#else
13310 RT_NOREF(idxInstr);
13311#endif
13312
13313 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
13314 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
13315 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13316
13317 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13318 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13319
13320 /* Done setting up parameters, make the call. */
13321 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13322
13323 /* The bUnmapInfo variable is implictly free by these MCs. */
13324 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
13325
13326 /*
13327 * Done, just fixup the jump for the non-call case.
13328 */
13329 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
13330
13331 return off;
13332}
13333
13334
13335
13336/*********************************************************************************************************************************
13337* State and Exceptions *
13338*********************************************************************************************************************************/
13339
13340#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13341#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13342
13343#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13344#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13345#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13346
13347#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13348#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13349#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13350
13351
13352DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
13353{
13354 /** @todo this needs a lot more work later. */
13355 RT_NOREF(pReNative, fForChange);
13356 return off;
13357}
13358
13359
13360
13361/*********************************************************************************************************************************
13362* Emitters for FPU related operations. *
13363*********************************************************************************************************************************/
13364
13365#define IEM_MC_FETCH_FCW(a_u16Fcw) \
13366 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
13367
13368/** Emits code for IEM_MC_FETCH_FCW. */
13369DECL_INLINE_THROW(uint32_t)
13370iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13371{
13372 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13373 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
13374
13375 /* Allocate a temporary FCW register. */
13376 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
13377 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw, kIemNativeGstRegUse_ReadOnly);
13378
13379 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
13380
13381 /* Free but don't flush the FCW register. */
13382 iemNativeRegFreeTmp(pReNative, idxFcwReg);
13383
13384 return off;
13385}
13386
13387
13388#define IEM_MC_FETCH_FSW(a_u16Fsw) \
13389 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
13390
13391/** Emits code for IEM_MC_FETCH_FSW. */
13392DECL_INLINE_THROW(uint32_t)
13393iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13394{
13395 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13396 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
13397
13398 /* Allocate a temporary FSW register. */
13399 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
13400 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
13401
13402 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
13403
13404 /* Free but don't flush the FSW register. */
13405 iemNativeRegFreeTmp(pReNative, idxFswReg);
13406
13407 return off;
13408}
13409
13410
13411
13412/*********************************************************************************************************************************
13413* The native code generator functions for each MC block. *
13414*********************************************************************************************************************************/
13415
13416
13417/*
13418 * Include g_apfnIemNativeRecompileFunctions and associated functions.
13419 *
13420 * This should probably live in it's own file later, but lets see what the
13421 * compile times turn out to be first.
13422 */
13423#include "IEMNativeFunctions.cpp.h"
13424
13425
13426
13427/*********************************************************************************************************************************
13428* Recompiler Core. *
13429*********************************************************************************************************************************/
13430
13431
13432/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
13433static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
13434{
13435 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
13436 pDis->cbCachedInstr += cbMaxRead;
13437 RT_NOREF(cbMinRead);
13438 return VERR_NO_DATA;
13439}
13440
13441
13442DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
13443{
13444 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
13445 {
13446#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
13447 ENTRY(fLocalForcedActions),
13448 ENTRY(iem.s.rcPassUp),
13449 ENTRY(iem.s.fExec),
13450 ENTRY(iem.s.pbInstrBuf),
13451 ENTRY(iem.s.uInstrBufPc),
13452 ENTRY(iem.s.GCPhysInstrBuf),
13453 ENTRY(iem.s.cbInstrBufTotal),
13454 ENTRY(iem.s.idxTbCurInstr),
13455#ifdef VBOX_WITH_STATISTICS
13456 ENTRY(iem.s.StatNativeTlbHitsForFetch),
13457 ENTRY(iem.s.StatNativeTlbHitsForStore),
13458 ENTRY(iem.s.StatNativeTlbHitsForStack),
13459 ENTRY(iem.s.StatNativeTlbHitsForMapped),
13460 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
13461 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
13462 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
13463 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
13464#endif
13465 ENTRY(iem.s.DataTlb.aEntries),
13466 ENTRY(iem.s.DataTlb.uTlbRevision),
13467 ENTRY(iem.s.DataTlb.uTlbPhysRev),
13468 ENTRY(iem.s.DataTlb.cTlbHits),
13469 ENTRY(iem.s.CodeTlb.aEntries),
13470 ENTRY(iem.s.CodeTlb.uTlbRevision),
13471 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
13472 ENTRY(iem.s.CodeTlb.cTlbHits),
13473 ENTRY(pVMR3),
13474 ENTRY(cpum.GstCtx.rax),
13475 ENTRY(cpum.GstCtx.ah),
13476 ENTRY(cpum.GstCtx.rcx),
13477 ENTRY(cpum.GstCtx.ch),
13478 ENTRY(cpum.GstCtx.rdx),
13479 ENTRY(cpum.GstCtx.dh),
13480 ENTRY(cpum.GstCtx.rbx),
13481 ENTRY(cpum.GstCtx.bh),
13482 ENTRY(cpum.GstCtx.rsp),
13483 ENTRY(cpum.GstCtx.rbp),
13484 ENTRY(cpum.GstCtx.rsi),
13485 ENTRY(cpum.GstCtx.rdi),
13486 ENTRY(cpum.GstCtx.r8),
13487 ENTRY(cpum.GstCtx.r9),
13488 ENTRY(cpum.GstCtx.r10),
13489 ENTRY(cpum.GstCtx.r11),
13490 ENTRY(cpum.GstCtx.r12),
13491 ENTRY(cpum.GstCtx.r13),
13492 ENTRY(cpum.GstCtx.r14),
13493 ENTRY(cpum.GstCtx.r15),
13494 ENTRY(cpum.GstCtx.es.Sel),
13495 ENTRY(cpum.GstCtx.es.u64Base),
13496 ENTRY(cpum.GstCtx.es.u32Limit),
13497 ENTRY(cpum.GstCtx.es.Attr),
13498 ENTRY(cpum.GstCtx.cs.Sel),
13499 ENTRY(cpum.GstCtx.cs.u64Base),
13500 ENTRY(cpum.GstCtx.cs.u32Limit),
13501 ENTRY(cpum.GstCtx.cs.Attr),
13502 ENTRY(cpum.GstCtx.ss.Sel),
13503 ENTRY(cpum.GstCtx.ss.u64Base),
13504 ENTRY(cpum.GstCtx.ss.u32Limit),
13505 ENTRY(cpum.GstCtx.ss.Attr),
13506 ENTRY(cpum.GstCtx.ds.Sel),
13507 ENTRY(cpum.GstCtx.ds.u64Base),
13508 ENTRY(cpum.GstCtx.ds.u32Limit),
13509 ENTRY(cpum.GstCtx.ds.Attr),
13510 ENTRY(cpum.GstCtx.fs.Sel),
13511 ENTRY(cpum.GstCtx.fs.u64Base),
13512 ENTRY(cpum.GstCtx.fs.u32Limit),
13513 ENTRY(cpum.GstCtx.fs.Attr),
13514 ENTRY(cpum.GstCtx.gs.Sel),
13515 ENTRY(cpum.GstCtx.gs.u64Base),
13516 ENTRY(cpum.GstCtx.gs.u32Limit),
13517 ENTRY(cpum.GstCtx.gs.Attr),
13518 ENTRY(cpum.GstCtx.rip),
13519 ENTRY(cpum.GstCtx.eflags),
13520 ENTRY(cpum.GstCtx.uRipInhibitInt),
13521#undef ENTRY
13522 };
13523#ifdef VBOX_STRICT
13524 static bool s_fOrderChecked = false;
13525 if (!s_fOrderChecked)
13526 {
13527 s_fOrderChecked = true;
13528 uint32_t offPrev = s_aMembers[0].off;
13529 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
13530 {
13531 Assert(s_aMembers[i].off > offPrev);
13532 offPrev = s_aMembers[i].off;
13533 }
13534 }
13535#endif
13536
13537 /*
13538 * Binary lookup.
13539 */
13540 unsigned iStart = 0;
13541 unsigned iEnd = RT_ELEMENTS(s_aMembers);
13542 for (;;)
13543 {
13544 unsigned const iCur = iStart + (iEnd - iStart) / 2;
13545 uint32_t const offCur = s_aMembers[iCur].off;
13546 if (off < offCur)
13547 {
13548 if (iCur != iStart)
13549 iEnd = iCur;
13550 else
13551 break;
13552 }
13553 else if (off > offCur)
13554 {
13555 if (iCur + 1 < iEnd)
13556 iStart = iCur + 1;
13557 else
13558 break;
13559 }
13560 else
13561 return s_aMembers[iCur].pszName;
13562 }
13563#ifdef VBOX_WITH_STATISTICS
13564 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
13565 return "iem.s.acThreadedFuncStats[iFn]";
13566#endif
13567 return NULL;
13568}
13569
13570
13571/**
13572 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
13573 * @returns pszBuf.
13574 * @param fFlags The flags.
13575 * @param pszBuf The output buffer.
13576 * @param cbBuf The output buffer size. At least 32 bytes.
13577 */
13578DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
13579{
13580 Assert(cbBuf >= 32);
13581 static RTSTRTUPLE const s_aModes[] =
13582 {
13583 /* [00] = */ { RT_STR_TUPLE("16BIT") },
13584 /* [01] = */ { RT_STR_TUPLE("32BIT") },
13585 /* [02] = */ { RT_STR_TUPLE("!2!") },
13586 /* [03] = */ { RT_STR_TUPLE("!3!") },
13587 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
13588 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
13589 /* [06] = */ { RT_STR_TUPLE("!6!") },
13590 /* [07] = */ { RT_STR_TUPLE("!7!") },
13591 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
13592 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
13593 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
13594 /* [0b] = */ { RT_STR_TUPLE("!b!") },
13595 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
13596 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
13597 /* [0e] = */ { RT_STR_TUPLE("!e!") },
13598 /* [0f] = */ { RT_STR_TUPLE("!f!") },
13599 /* [10] = */ { RT_STR_TUPLE("!10!") },
13600 /* [11] = */ { RT_STR_TUPLE("!11!") },
13601 /* [12] = */ { RT_STR_TUPLE("!12!") },
13602 /* [13] = */ { RT_STR_TUPLE("!13!") },
13603 /* [14] = */ { RT_STR_TUPLE("!14!") },
13604 /* [15] = */ { RT_STR_TUPLE("!15!") },
13605 /* [16] = */ { RT_STR_TUPLE("!16!") },
13606 /* [17] = */ { RT_STR_TUPLE("!17!") },
13607 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
13608 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
13609 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
13610 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
13611 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
13612 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
13613 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
13614 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
13615 };
13616 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
13617 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
13618 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
13619
13620 pszBuf[off++] = ' ';
13621 pszBuf[off++] = 'C';
13622 pszBuf[off++] = 'P';
13623 pszBuf[off++] = 'L';
13624 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
13625 Assert(off < 32);
13626
13627 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
13628
13629 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
13630 {
13631 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
13632 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
13633 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
13634 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
13635 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
13636 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
13637 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
13638 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
13639 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
13640 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
13641 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
13642 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
13643 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
13644 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
13645 };
13646 if (fFlags)
13647 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
13648 if (s_aFlags[i].fFlag & fFlags)
13649 {
13650 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
13651 pszBuf[off++] = ' ';
13652 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
13653 off += s_aFlags[i].cchName;
13654 fFlags &= ~s_aFlags[i].fFlag;
13655 if (!fFlags)
13656 break;
13657 }
13658 pszBuf[off] = '\0';
13659
13660 return pszBuf;
13661}
13662
13663
13664DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
13665{
13666 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
13667#if defined(RT_ARCH_AMD64)
13668 static const char * const a_apszMarkers[] =
13669 {
13670 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
13671 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
13672 };
13673#endif
13674
13675 char szDisBuf[512];
13676 DISSTATE Dis;
13677 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
13678 uint32_t const cNative = pTb->Native.cInstructions;
13679 uint32_t offNative = 0;
13680#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13681 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
13682#endif
13683 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13684 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13685 : DISCPUMODE_64BIT;
13686#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13687 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
13688#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13689 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
13690#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13691# error "Port me"
13692#else
13693 csh hDisasm = ~(size_t)0;
13694# if defined(RT_ARCH_AMD64)
13695 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
13696# elif defined(RT_ARCH_ARM64)
13697 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
13698# else
13699# error "Port me"
13700# endif
13701 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
13702#endif
13703
13704 /*
13705 * Print TB info.
13706 */
13707 pHlp->pfnPrintf(pHlp,
13708 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
13709 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
13710 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
13711 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
13712#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13713 if (pDbgInfo && pDbgInfo->cEntries > 1)
13714 {
13715 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
13716
13717 /*
13718 * This disassembly is driven by the debug info which follows the native
13719 * code and indicates when it starts with the next guest instructions,
13720 * where labels are and such things.
13721 */
13722 uint32_t idxThreadedCall = 0;
13723 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
13724 uint8_t idxRange = UINT8_MAX;
13725 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
13726 uint32_t offRange = 0;
13727 uint32_t offOpcodes = 0;
13728 uint32_t const cbOpcodes = pTb->cbOpcodes;
13729 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
13730 uint32_t const cDbgEntries = pDbgInfo->cEntries;
13731 uint32_t iDbgEntry = 1;
13732 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
13733
13734 while (offNative < cNative)
13735 {
13736 /* If we're at or have passed the point where the next chunk of debug
13737 info starts, process it. */
13738 if (offDbgNativeNext <= offNative)
13739 {
13740 offDbgNativeNext = UINT32_MAX;
13741 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
13742 {
13743 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
13744 {
13745 case kIemTbDbgEntryType_GuestInstruction:
13746 {
13747 /* Did the exec flag change? */
13748 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
13749 {
13750 pHlp->pfnPrintf(pHlp,
13751 " fExec change %#08x -> %#08x %s\n",
13752 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13753 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13754 szDisBuf, sizeof(szDisBuf)));
13755 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
13756 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13757 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13758 : DISCPUMODE_64BIT;
13759 }
13760
13761 /* New opcode range? We need to fend up a spurious debug info entry here for cases
13762 where the compilation was aborted before the opcode was recorded and the actual
13763 instruction was translated to a threaded call. This may happen when we run out
13764 of ranges, or when some complicated interrupts/FFs are found to be pending or
13765 similar. So, we just deal with it here rather than in the compiler code as it
13766 is a lot simpler to do here. */
13767 if ( idxRange == UINT8_MAX
13768 || idxRange >= cRanges
13769 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
13770 {
13771 idxRange += 1;
13772 if (idxRange < cRanges)
13773 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
13774 else
13775 continue;
13776 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
13777 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
13778 + (pTb->aRanges[idxRange].idxPhysPage == 0
13779 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13780 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
13781 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13782 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
13783 pTb->aRanges[idxRange].idxPhysPage);
13784 GCPhysPc += offRange;
13785 }
13786
13787 /* Disassemble the instruction. */
13788 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
13789 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
13790 uint32_t cbInstr = 1;
13791 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13792 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
13793 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13794 if (RT_SUCCESS(rc))
13795 {
13796 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13797 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13798 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13799 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13800
13801 static unsigned const s_offMarker = 55;
13802 static char const s_szMarker[] = " ; <--- guest";
13803 if (cch < s_offMarker)
13804 {
13805 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
13806 cch = s_offMarker;
13807 }
13808 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
13809 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
13810
13811 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
13812 }
13813 else
13814 {
13815 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
13816 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
13817 cbInstr = 1;
13818 }
13819 GCPhysPc += cbInstr;
13820 offOpcodes += cbInstr;
13821 offRange += cbInstr;
13822 continue;
13823 }
13824
13825 case kIemTbDbgEntryType_ThreadedCall:
13826 pHlp->pfnPrintf(pHlp,
13827 " Call #%u to %s (%u args) - %s\n",
13828 idxThreadedCall,
13829 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13830 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13831 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
13832 idxThreadedCall++;
13833 continue;
13834
13835 case kIemTbDbgEntryType_GuestRegShadowing:
13836 {
13837 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
13838 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
13839 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
13840 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
13841 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13842 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
13843 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
13844 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
13845 else
13846 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
13847 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
13848 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13849 continue;
13850 }
13851
13852 case kIemTbDbgEntryType_Label:
13853 {
13854 const char *pszName = "what_the_fudge";
13855 const char *pszComment = "";
13856 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
13857 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
13858 {
13859 case kIemNativeLabelType_Return:
13860 pszName = "Return";
13861 break;
13862 case kIemNativeLabelType_ReturnBreak:
13863 pszName = "ReturnBreak";
13864 break;
13865 case kIemNativeLabelType_ReturnWithFlags:
13866 pszName = "ReturnWithFlags";
13867 break;
13868 case kIemNativeLabelType_NonZeroRetOrPassUp:
13869 pszName = "NonZeroRetOrPassUp";
13870 break;
13871 case kIemNativeLabelType_RaiseGp0:
13872 pszName = "RaiseGp0";
13873 break;
13874 case kIemNativeLabelType_RaiseNm:
13875 pszName = "RaiseNm";
13876 break;
13877 case kIemNativeLabelType_RaiseUd:
13878 pszName = "RaiseUd";
13879 break;
13880 case kIemNativeLabelType_ObsoleteTb:
13881 pszName = "ObsoleteTb";
13882 break;
13883 case kIemNativeLabelType_NeedCsLimChecking:
13884 pszName = "NeedCsLimChecking";
13885 break;
13886 case kIemNativeLabelType_CheckBranchMiss:
13887 pszName = "CheckBranchMiss";
13888 break;
13889 case kIemNativeLabelType_If:
13890 pszName = "If";
13891 fNumbered = true;
13892 break;
13893 case kIemNativeLabelType_Else:
13894 pszName = "Else";
13895 fNumbered = true;
13896 pszComment = " ; regs state restored pre-if-block";
13897 break;
13898 case kIemNativeLabelType_Endif:
13899 pszName = "Endif";
13900 fNumbered = true;
13901 break;
13902 case kIemNativeLabelType_CheckIrq:
13903 pszName = "CheckIrq_CheckVM";
13904 fNumbered = true;
13905 break;
13906 case kIemNativeLabelType_TlbLookup:
13907 pszName = "TlbLookup";
13908 fNumbered = true;
13909 break;
13910 case kIemNativeLabelType_TlbMiss:
13911 pszName = "TlbMiss";
13912 fNumbered = true;
13913 break;
13914 case kIemNativeLabelType_TlbDone:
13915 pszName = "TlbDone";
13916 fNumbered = true;
13917 break;
13918 case kIemNativeLabelType_Invalid:
13919 case kIemNativeLabelType_End:
13920 break;
13921 }
13922 if (fNumbered)
13923 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
13924 else
13925 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
13926 continue;
13927 }
13928
13929 case kIemTbDbgEntryType_NativeOffset:
13930 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
13931 Assert(offDbgNativeNext > offNative);
13932 break;
13933
13934 default:
13935 AssertFailed();
13936 }
13937 iDbgEntry++;
13938 break;
13939 }
13940 }
13941
13942 /*
13943 * Disassemble the next native instruction.
13944 */
13945 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13946# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13947 uint32_t cbInstr = sizeof(paNative[0]);
13948 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13949 if (RT_SUCCESS(rc))
13950 {
13951# if defined(RT_ARCH_AMD64)
13952 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13953 {
13954 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13955 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13956 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13957 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13958 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13959 uInfo & 0x8000 ? "recompiled" : "todo");
13960 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13961 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13962 else
13963 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13964 }
13965 else
13966# endif
13967 {
13968 const char *pszAnnotation = NULL;
13969# ifdef RT_ARCH_AMD64
13970 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13971 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13972 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13973 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13974 PCDISOPPARAM pMemOp;
13975 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
13976 pMemOp = &Dis.Param1;
13977 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
13978 pMemOp = &Dis.Param2;
13979 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
13980 pMemOp = &Dis.Param3;
13981 else
13982 pMemOp = NULL;
13983 if ( pMemOp
13984 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
13985 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
13986 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
13987 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
13988
13989#elif defined(RT_ARCH_ARM64)
13990 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13991 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13992 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13993# else
13994# error "Port me"
13995# endif
13996 if (pszAnnotation)
13997 {
13998 static unsigned const s_offAnnotation = 55;
13999 size_t const cchAnnotation = strlen(pszAnnotation);
14000 size_t cchDis = strlen(szDisBuf);
14001 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
14002 {
14003 if (cchDis < s_offAnnotation)
14004 {
14005 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
14006 cchDis = s_offAnnotation;
14007 }
14008 szDisBuf[cchDis++] = ' ';
14009 szDisBuf[cchDis++] = ';';
14010 szDisBuf[cchDis++] = ' ';
14011 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
14012 }
14013 }
14014 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14015 }
14016 }
14017 else
14018 {
14019# if defined(RT_ARCH_AMD64)
14020 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14021 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14022# elif defined(RT_ARCH_ARM64)
14023 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14024# else
14025# error "Port me"
14026# endif
14027 cbInstr = sizeof(paNative[0]);
14028 }
14029 offNative += cbInstr / sizeof(paNative[0]);
14030
14031# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14032 cs_insn *pInstr;
14033 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14034 (uintptr_t)pNativeCur, 1, &pInstr);
14035 if (cInstrs > 0)
14036 {
14037 Assert(cInstrs == 1);
14038# if defined(RT_ARCH_AMD64)
14039 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14040 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14041# else
14042 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14043 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14044# endif
14045 offNative += pInstr->size / sizeof(*pNativeCur);
14046 cs_free(pInstr, cInstrs);
14047 }
14048 else
14049 {
14050# if defined(RT_ARCH_AMD64)
14051 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14052 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14053# else
14054 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14055# endif
14056 offNative++;
14057 }
14058# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14059 }
14060 }
14061 else
14062#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
14063 {
14064 /*
14065 * No debug info, just disassemble the x86 code and then the native code.
14066 *
14067 * First the guest code:
14068 */
14069 for (unsigned i = 0; i < pTb->cRanges; i++)
14070 {
14071 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
14072 + (pTb->aRanges[i].idxPhysPage == 0
14073 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14074 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
14075 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14076 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
14077 unsigned off = pTb->aRanges[i].offOpcodes;
14078 /** @todo this ain't working when crossing pages! */
14079 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
14080 while (off < cbOpcodes)
14081 {
14082 uint32_t cbInstr = 1;
14083 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14084 &pTb->pabOpcodes[off], cbOpcodes - off,
14085 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14086 if (RT_SUCCESS(rc))
14087 {
14088 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14089 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14090 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14091 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14092 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
14093 GCPhysPc += cbInstr;
14094 off += cbInstr;
14095 }
14096 else
14097 {
14098 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
14099 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
14100 break;
14101 }
14102 }
14103 }
14104
14105 /*
14106 * Then the native code:
14107 */
14108 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
14109 while (offNative < cNative)
14110 {
14111 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14112# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14113 uint32_t cbInstr = sizeof(paNative[0]);
14114 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14115 if (RT_SUCCESS(rc))
14116 {
14117# if defined(RT_ARCH_AMD64)
14118 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14119 {
14120 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14121 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14122 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14123 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14124 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14125 uInfo & 0x8000 ? "recompiled" : "todo");
14126 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14127 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14128 else
14129 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14130 }
14131 else
14132# endif
14133 {
14134# ifdef RT_ARCH_AMD64
14135 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14136 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14137 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14138 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14139# elif defined(RT_ARCH_ARM64)
14140 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14141 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14142 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14143# else
14144# error "Port me"
14145# endif
14146 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14147 }
14148 }
14149 else
14150 {
14151# if defined(RT_ARCH_AMD64)
14152 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14153 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14154# else
14155 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14156# endif
14157 cbInstr = sizeof(paNative[0]);
14158 }
14159 offNative += cbInstr / sizeof(paNative[0]);
14160
14161# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14162 cs_insn *pInstr;
14163 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14164 (uintptr_t)pNativeCur, 1, &pInstr);
14165 if (cInstrs > 0)
14166 {
14167 Assert(cInstrs == 1);
14168# if defined(RT_ARCH_AMD64)
14169 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14170 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14171# else
14172 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14173 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14174# endif
14175 offNative += pInstr->size / sizeof(*pNativeCur);
14176 cs_free(pInstr, cInstrs);
14177 }
14178 else
14179 {
14180# if defined(RT_ARCH_AMD64)
14181 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14182 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14183# else
14184 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14185# endif
14186 offNative++;
14187 }
14188# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14189 }
14190 }
14191
14192#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14193 /* Cleanup. */
14194 cs_close(&hDisasm);
14195#endif
14196}
14197
14198
14199/**
14200 * Recompiles the given threaded TB into a native one.
14201 *
14202 * In case of failure the translation block will be returned as-is.
14203 *
14204 * @returns pTb.
14205 * @param pVCpu The cross context virtual CPU structure of the calling
14206 * thread.
14207 * @param pTb The threaded translation to recompile to native.
14208 */
14209DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
14210{
14211 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
14212
14213 /*
14214 * The first time thru, we allocate the recompiler state, the other times
14215 * we just need to reset it before using it again.
14216 */
14217 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
14218 if (RT_LIKELY(pReNative))
14219 iemNativeReInit(pReNative, pTb);
14220 else
14221 {
14222 pReNative = iemNativeInit(pVCpu, pTb);
14223 AssertReturn(pReNative, pTb);
14224 }
14225
14226#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14227 /*
14228 * First do liveness analysis. This is done backwards.
14229 */
14230 {
14231 uint32_t idxCall = pTb->Thrd.cCalls;
14232 if (idxCall <= pReNative->cLivenessEntriesAlloc)
14233 { /* likely */ }
14234 else
14235 {
14236 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
14237 while (idxCall > cAlloc)
14238 cAlloc *= 2;
14239 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
14240 AssertReturn(pvNew, pTb);
14241 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
14242 pReNative->cLivenessEntriesAlloc = cAlloc;
14243 }
14244 AssertReturn(idxCall > 0, pTb);
14245 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
14246
14247 /* The initial (final) entry. */
14248 idxCall--;
14249 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
14250
14251 /* Loop backwards thru the calls and fill in the other entries. */
14252 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
14253 while (idxCall > 0)
14254 {
14255 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
14256 if (pfnLiveness)
14257 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
14258 else
14259 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
14260 pCallEntry--;
14261 idxCall--;
14262 }
14263
14264# ifdef VBOX_WITH_STATISTICS
14265 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
14266 to 'clobbered' rather that 'input'. */
14267 /** @todo */
14268# endif
14269 }
14270#endif
14271
14272 /*
14273 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
14274 * for aborting if an error happens.
14275 */
14276 uint32_t cCallsLeft = pTb->Thrd.cCalls;
14277#ifdef LOG_ENABLED
14278 uint32_t const cCallsOrg = cCallsLeft;
14279#endif
14280 uint32_t off = 0;
14281 int rc = VINF_SUCCESS;
14282 IEMNATIVE_TRY_SETJMP(pReNative, rc)
14283 {
14284 /*
14285 * Emit prolog code (fixed).
14286 */
14287 off = iemNativeEmitProlog(pReNative, off);
14288
14289 /*
14290 * Convert the calls to native code.
14291 */
14292#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14293 int32_t iGstInstr = -1;
14294#endif
14295#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
14296 uint32_t cThreadedCalls = 0;
14297 uint32_t cRecompiledCalls = 0;
14298#endif
14299#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14300 uint32_t idxCurCall = 0;
14301#endif
14302 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
14303 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
14304 while (cCallsLeft-- > 0)
14305 {
14306 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
14307#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14308 pReNative->idxCurCall = idxCurCall;
14309#endif
14310
14311 /*
14312 * Debug info, assembly markup and statistics.
14313 */
14314#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
14315 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
14316 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
14317#endif
14318#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14319 iemNativeDbgInfoAddNativeOffset(pReNative, off);
14320 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
14321 {
14322 if (iGstInstr < (int32_t)pTb->cInstructions)
14323 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
14324 else
14325 Assert(iGstInstr == pTb->cInstructions);
14326 iGstInstr = pCallEntry->idxInstr;
14327 }
14328 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
14329#endif
14330#if defined(VBOX_STRICT)
14331 off = iemNativeEmitMarker(pReNative, off,
14332 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
14333#endif
14334#if defined(VBOX_STRICT)
14335 iemNativeRegAssertSanity(pReNative);
14336#endif
14337#ifdef VBOX_WITH_STATISTICS
14338 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
14339#endif
14340
14341 /*
14342 * Actual work.
14343 */
14344 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
14345 pfnRecom ? "(recompiled)" : "(todo)"));
14346 if (pfnRecom) /** @todo stats on this. */
14347 {
14348 off = pfnRecom(pReNative, off, pCallEntry);
14349 STAM_REL_STATS({cRecompiledCalls++;});
14350 }
14351 else
14352 {
14353 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
14354 STAM_REL_STATS({cThreadedCalls++;});
14355 }
14356 Assert(off <= pReNative->cInstrBufAlloc);
14357 Assert(pReNative->cCondDepth == 0);
14358
14359#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
14360 if (LogIs2Enabled())
14361 {
14362 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
14363# ifndef IEMLIVENESS_EXTENDED_LAYOUT
14364 static const char s_achState[] = "CUXI";
14365# else
14366 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
14367# endif
14368
14369 char szGpr[17];
14370 for (unsigned i = 0; i < 16; i++)
14371 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
14372 szGpr[16] = '\0';
14373
14374 char szSegBase[X86_SREG_COUNT + 1];
14375 char szSegLimit[X86_SREG_COUNT + 1];
14376 char szSegAttrib[X86_SREG_COUNT + 1];
14377 char szSegSel[X86_SREG_COUNT + 1];
14378 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
14379 {
14380 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
14381 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
14382 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
14383 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
14384 }
14385 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
14386 = szSegSel[X86_SREG_COUNT] = '\0';
14387
14388 char szEFlags[8];
14389 for (unsigned i = 0; i < 7; i++)
14390 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
14391 szEFlags[7] = '\0';
14392
14393 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
14394 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
14395 }
14396#endif
14397
14398 /*
14399 * Advance.
14400 */
14401 pCallEntry++;
14402#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14403 idxCurCall++;
14404#endif
14405 }
14406
14407 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
14408 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
14409 if (!cThreadedCalls)
14410 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
14411
14412 /*
14413 * Emit the epilog code.
14414 */
14415 uint32_t idxReturnLabel;
14416 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
14417
14418 /*
14419 * Generate special jump labels.
14420 */
14421 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
14422 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
14423 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
14424 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
14425 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
14426 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
14427 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
14428 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
14429 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
14430 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
14431 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
14432 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
14433 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
14434 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
14435 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
14436 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
14437 }
14438 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
14439 {
14440 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
14441 return pTb;
14442 }
14443 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
14444 Assert(off <= pReNative->cInstrBufAlloc);
14445
14446 /*
14447 * Make sure all labels has been defined.
14448 */
14449 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
14450#ifdef VBOX_STRICT
14451 uint32_t const cLabels = pReNative->cLabels;
14452 for (uint32_t i = 0; i < cLabels; i++)
14453 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
14454#endif
14455
14456 /*
14457 * Allocate executable memory, copy over the code we've generated.
14458 */
14459 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
14460 if (pTbAllocator->pDelayedFreeHead)
14461 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
14462
14463 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
14464 AssertReturn(paFinalInstrBuf, pTb);
14465 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
14466
14467 /*
14468 * Apply fixups.
14469 */
14470 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
14471 uint32_t const cFixups = pReNative->cFixups;
14472 for (uint32_t i = 0; i < cFixups; i++)
14473 {
14474 Assert(paFixups[i].off < off);
14475 Assert(paFixups[i].idxLabel < cLabels);
14476 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
14477 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
14478 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
14479 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
14480 switch (paFixups[i].enmType)
14481 {
14482#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
14483 case kIemNativeFixupType_Rel32:
14484 Assert(paFixups[i].off + 4 <= off);
14485 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14486 continue;
14487
14488#elif defined(RT_ARCH_ARM64)
14489 case kIemNativeFixupType_RelImm26At0:
14490 {
14491 Assert(paFixups[i].off < off);
14492 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14493 Assert(offDisp >= -262144 && offDisp < 262144);
14494 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
14495 continue;
14496 }
14497
14498 case kIemNativeFixupType_RelImm19At5:
14499 {
14500 Assert(paFixups[i].off < off);
14501 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14502 Assert(offDisp >= -262144 && offDisp < 262144);
14503 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
14504 continue;
14505 }
14506
14507 case kIemNativeFixupType_RelImm14At5:
14508 {
14509 Assert(paFixups[i].off < off);
14510 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14511 Assert(offDisp >= -8192 && offDisp < 8192);
14512 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
14513 continue;
14514 }
14515
14516#endif
14517 case kIemNativeFixupType_Invalid:
14518 case kIemNativeFixupType_End:
14519 break;
14520 }
14521 AssertFailed();
14522 }
14523
14524 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
14525 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
14526
14527 /*
14528 * Convert the translation block.
14529 */
14530 RTMemFree(pTb->Thrd.paCalls);
14531 pTb->Native.paInstructions = paFinalInstrBuf;
14532 pTb->Native.cInstructions = off;
14533 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
14534#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14535 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
14536 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
14537#endif
14538
14539 Assert(pTbAllocator->cThreadedTbs > 0);
14540 pTbAllocator->cThreadedTbs -= 1;
14541 pTbAllocator->cNativeTbs += 1;
14542 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
14543
14544#ifdef LOG_ENABLED
14545 /*
14546 * Disassemble to the log if enabled.
14547 */
14548 if (LogIs3Enabled())
14549 {
14550 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
14551 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
14552# ifdef DEBUG_bird
14553 RTLogFlush(NULL);
14554# endif
14555 }
14556#endif
14557 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
14558
14559 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
14560 return pTb;
14561}
14562
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette