VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103728

Last change on this file since 103728 was 103728, checked in by vboxsync, 9 months ago

VMM/IEM: Initial implementation of a SIMD register allocator and associated code in order to be able to recompile SSE/AVX instructions (disabled by default and only working on ARM64 right now), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 730.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103728 2024-03-07 12:11:33Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
136static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
137 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
138# endif
139static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
140#endif
141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
142static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
143static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
144#endif
145DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
146DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
147 IEMNATIVEGSTREG enmGstReg, uint32_t off);
148DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
149
150
151/*********************************************************************************************************************************
152* Executable Memory Allocator *
153*********************************************************************************************************************************/
154/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 * Use an alternative chunk sub-allocator that does store internal data
156 * in the chunk.
157 *
158 * Using the RTHeapSimple is not practial on newer darwin systems where
159 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
160 * memory. We would have to change the protection of the whole chunk for
161 * every call to RTHeapSimple, which would be rather expensive.
162 *
163 * This alternative implemenation let restrict page protection modifications
164 * to the pages backing the executable memory we just allocated.
165 */
166#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
167/** The chunk sub-allocation unit size in bytes. */
168#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
169/** The chunk sub-allocation unit size as a shift factor. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
171
172#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
173# ifdef IEMNATIVE_USE_GDB_JIT
174# define IEMNATIVE_USE_GDB_JIT_ET_DYN
175
176/** GDB JIT: Code entry. */
177typedef struct GDBJITCODEENTRY
178{
179 struct GDBJITCODEENTRY *pNext;
180 struct GDBJITCODEENTRY *pPrev;
181 uint8_t *pbSymFile;
182 uint64_t cbSymFile;
183} GDBJITCODEENTRY;
184
185/** GDB JIT: Actions. */
186typedef enum GDBJITACTIONS : uint32_t
187{
188 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
189} GDBJITACTIONS;
190
191/** GDB JIT: Descriptor. */
192typedef struct GDBJITDESCRIPTOR
193{
194 uint32_t uVersion;
195 GDBJITACTIONS enmAction;
196 GDBJITCODEENTRY *pRelevant;
197 GDBJITCODEENTRY *pHead;
198 /** Our addition: */
199 GDBJITCODEENTRY *pTail;
200} GDBJITDESCRIPTOR;
201
202/** GDB JIT: Our simple symbol file data. */
203typedef struct GDBJITSYMFILE
204{
205 Elf64_Ehdr EHdr;
206# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Shdr aShdrs[5];
208# else
209 Elf64_Shdr aShdrs[7];
210 Elf64_Phdr aPhdrs[2];
211# endif
212 /** The dwarf ehframe data for the chunk. */
213 uint8_t abEhFrame[512];
214 char szzStrTab[128];
215 Elf64_Sym aSymbols[3];
216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
217 Elf64_Sym aDynSyms[2];
218 Elf64_Dyn aDyn[6];
219# endif
220} GDBJITSYMFILE;
221
222extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
223extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
224
225/** Init once for g_IemNativeGdbJitLock. */
226static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
227/** Init once for the critical section. */
228static RTCRITSECT g_IemNativeGdbJitLock;
229
230/** GDB reads the info here. */
231GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
232
233/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
234DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
235{
236 ASMNopPause();
237}
238
239/** @callback_method_impl{FNRTONCE} */
240static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
241{
242 RT_NOREF(pvUser);
243 return RTCritSectInit(&g_IemNativeGdbJitLock);
244}
245
246
247# endif /* IEMNATIVE_USE_GDB_JIT */
248
249/**
250 * Per-chunk unwind info for non-windows hosts.
251 */
252typedef struct IEMEXECMEMCHUNKEHFRAME
253{
254# ifdef IEMNATIVE_USE_LIBUNWIND
255 /** The offset of the FDA into abEhFrame. */
256 uintptr_t offFda;
257# else
258 /** 'struct object' storage area. */
259 uint8_t abObject[1024];
260# endif
261# ifdef IEMNATIVE_USE_GDB_JIT
262# if 0
263 /** The GDB JIT 'symbol file' data. */
264 GDBJITSYMFILE GdbJitSymFile;
265# endif
266 /** The GDB JIT list entry. */
267 GDBJITCODEENTRY GdbJitEntry;
268# endif
269 /** The dwarf ehframe data for the chunk. */
270 uint8_t abEhFrame[512];
271} IEMEXECMEMCHUNKEHFRAME;
272/** Pointer to per-chunk info info for non-windows hosts. */
273typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
274#endif
275
276
277/**
278 * An chunk of executable memory.
279 */
280typedef struct IEMEXECMEMCHUNK
281{
282#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
283 /** Number of free items in this chunk. */
284 uint32_t cFreeUnits;
285 /** Hint were to start searching for free space in the allocation bitmap. */
286 uint32_t idxFreeHint;
287#else
288 /** The heap handle. */
289 RTHEAPSIMPLE hHeap;
290#endif
291 /** Pointer to the chunk. */
292 void *pvChunk;
293#ifdef IN_RING3
294 /**
295 * Pointer to the unwind information.
296 *
297 * This is used during C++ throw and longjmp (windows and probably most other
298 * platforms). Some debuggers (windbg) makes use of it as well.
299 *
300 * Windows: This is allocated from hHeap on windows because (at least for
301 * AMD64) the UNWIND_INFO structure address in the
302 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
303 *
304 * Others: Allocated from the regular heap to avoid unnecessary executable data
305 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
306 void *pvUnwindInfo;
307#elif defined(IN_RING0)
308 /** Allocation handle. */
309 RTR0MEMOBJ hMemObj;
310#endif
311} IEMEXECMEMCHUNK;
312/** Pointer to a memory chunk. */
313typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
314
315
316/**
317 * Executable memory allocator for the native recompiler.
318 */
319typedef struct IEMEXECMEMALLOCATOR
320{
321 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
322 uint32_t uMagic;
323
324 /** The chunk size. */
325 uint32_t cbChunk;
326 /** The maximum number of chunks. */
327 uint32_t cMaxChunks;
328 /** The current number of chunks. */
329 uint32_t cChunks;
330 /** Hint where to start looking for available memory. */
331 uint32_t idxChunkHint;
332 /** Statistics: Current number of allocations. */
333 uint32_t cAllocations;
334
335 /** The total amount of memory available. */
336 uint64_t cbTotal;
337 /** Total amount of free memory. */
338 uint64_t cbFree;
339 /** Total amount of memory allocated. */
340 uint64_t cbAllocated;
341
342#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
343 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
344 *
345 * Since the chunk size is a power of two and the minimum chunk size is a lot
346 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
347 * require a whole number of uint64_t elements in the allocation bitmap. So,
348 * for sake of simplicity, they are allocated as one continous chunk for
349 * simplicity/laziness. */
350 uint64_t *pbmAlloc;
351 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
352 uint32_t cUnitsPerChunk;
353 /** Number of bitmap elements per chunk (for quickly locating the bitmap
354 * portion corresponding to an chunk). */
355 uint32_t cBitmapElementsPerChunk;
356#else
357 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
358 * @{ */
359 /** The size of the heap internal block header. This is used to adjust the
360 * request memory size to make sure there is exacly enough room for a header at
361 * the end of the blocks we allocate before the next 64 byte alignment line. */
362 uint32_t cbHeapBlockHdr;
363 /** The size of initial heap allocation required make sure the first
364 * allocation is correctly aligned. */
365 uint32_t cbHeapAlignTweak;
366 /** The alignment tweak allocation address. */
367 void *pvAlignTweak;
368 /** @} */
369#endif
370
371#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
372 /** Pointer to the array of unwind info running parallel to aChunks (same
373 * allocation as this structure, located after the bitmaps).
374 * (For Windows, the structures must reside in 32-bit RVA distance to the
375 * actual chunk, so they are allocated off the chunk.) */
376 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
377#endif
378
379 /** The allocation chunks. */
380 RT_FLEXIBLE_ARRAY_EXTENSION
381 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
382} IEMEXECMEMALLOCATOR;
383/** Pointer to an executable memory allocator. */
384typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
385
386/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
387#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
388
389
390static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
391
392
393/**
394 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
395 * the heap statistics.
396 */
397static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
398 uint32_t cbReq, uint32_t idxChunk)
399{
400 pExecMemAllocator->cAllocations += 1;
401 pExecMemAllocator->cbAllocated += cbReq;
402#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
403 pExecMemAllocator->cbFree -= cbReq;
404#else
405 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
406#endif
407 pExecMemAllocator->idxChunkHint = idxChunk;
408
409#ifdef RT_OS_DARWIN
410 /*
411 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
412 * on darwin. So, we mark the pages returned as read+write after alloc and
413 * expect the caller to call iemExecMemAllocatorReadyForUse when done
414 * writing to the allocation.
415 *
416 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
417 * for details.
418 */
419 /** @todo detect if this is necessary... it wasn't required on 10.15 or
420 * whatever older version it was. */
421 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
422 AssertRC(rc);
423#endif
424
425 return pvRet;
426}
427
428
429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
430static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
431 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
432{
433 /*
434 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
435 */
436 Assert(!(cToScan & 63));
437 Assert(!(idxFirst & 63));
438 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
439 pbmAlloc += idxFirst / 64;
440
441 /*
442 * Scan the bitmap for cReqUnits of consequtive clear bits
443 */
444 /** @todo This can probably be done more efficiently for non-x86 systems. */
445 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
446 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
447 {
448 uint32_t idxAddBit = 1;
449 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
450 idxAddBit++;
451 if (idxAddBit >= cReqUnits)
452 {
453 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
454
455 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
456 pChunk->cFreeUnits -= cReqUnits;
457 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
458
459 void * const pvRet = (uint8_t *)pChunk->pvChunk
460 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
461
462 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
463 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
464 }
465
466 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
467 }
468 return NULL;
469}
470#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
471
472
473static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
474{
475#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
476 /*
477 * Figure out how much to allocate.
478 */
479 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
480 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
481 {
482 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
483 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
484 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
485 {
486 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
487 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
488 if (pvRet)
489 return pvRet;
490 }
491 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
492 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
493 cReqUnits, idxChunk);
494 }
495#else
496 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
497 if (pvRet)
498 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
499#endif
500 return NULL;
501
502}
503
504
505/**
506 * Allocates @a cbReq bytes of executable memory.
507 *
508 * @returns Pointer to the memory, NULL if out of memory or other problem
509 * encountered.
510 * @param pVCpu The cross context virtual CPU structure of the calling
511 * thread.
512 * @param cbReq How many bytes are required.
513 */
514static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
515{
516 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
517 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
518 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
519
520
521 for (unsigned iIteration = 0;; iIteration++)
522 {
523 /*
524 * Adjust the request size so it'll fit the allocator alignment/whatnot.
525 *
526 * For the RTHeapSimple allocator this means to follow the logic described
527 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
528 * existing chunks if we think we've got sufficient free memory around.
529 *
530 * While for the alternative one we just align it up to a whole unit size.
531 */
532#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
533 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
534#else
535 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
536#endif
537 if (cbReq <= pExecMemAllocator->cbFree)
538 {
539 uint32_t const cChunks = pExecMemAllocator->cChunks;
540 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
541 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
548 {
549 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
550 if (pvRet)
551 return pvRet;
552 }
553 }
554
555 /*
556 * Can we grow it with another chunk?
557 */
558 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
559 {
560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
561 AssertLogRelRCReturn(rc, NULL);
562
563 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
564 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
565 if (pvRet)
566 return pvRet;
567 AssertFailed();
568 }
569
570 /*
571 * Try prune native TBs once.
572 */
573 if (iIteration == 0)
574 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
575 else
576 {
577 /** @todo stats... */
578 return NULL;
579 }
580 }
581
582}
583
584
585/** This is a hook that we may need later for changing memory protection back
586 * to readonly+exec */
587static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
588{
589#ifdef RT_OS_DARWIN
590 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
591 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
592 AssertRC(rc); RT_NOREF(pVCpu);
593
594 /*
595 * Flush the instruction cache:
596 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
597 */
598 /* sys_dcache_flush(pv, cb); - not necessary */
599 sys_icache_invalidate(pv, cb);
600#else
601 RT_NOREF(pVCpu, pv, cb);
602#endif
603}
604
605
606/**
607 * Frees executable memory.
608 */
609void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
610{
611 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
612 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
613 Assert(pv);
614#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
615 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
616#else
617 Assert(!((uintptr_t)pv & 63));
618#endif
619
620 /* Align the size as we did when allocating the block. */
621#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
622 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
623#else
624 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
625#endif
626
627 /* Free it / assert sanity. */
628#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
629 uint32_t const cChunks = pExecMemAllocator->cChunks;
630 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
631 bool fFound = false;
632 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
633 {
634 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
635 fFound = offChunk < cbChunk;
636 if (fFound)
637 {
638#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
639 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
640 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
641
642 /* Check that it's valid and free it. */
643 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
644 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
645 for (uint32_t i = 1; i < cReqUnits; i++)
646 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
647 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
648
649 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
650 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
651
652 /* Update the stats. */
653 pExecMemAllocator->cbAllocated -= cb;
654 pExecMemAllocator->cbFree += cb;
655 pExecMemAllocator->cAllocations -= 1;
656 return;
657#else
658 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
659 break;
660#endif
661 }
662 }
663# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
664 AssertFailed();
665# else
666 Assert(fFound);
667# endif
668#endif
669
670#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
671 /* Update stats while cb is freshly calculated.*/
672 pExecMemAllocator->cbAllocated -= cb;
673 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
674 pExecMemAllocator->cAllocations -= 1;
675
676 /* Free it. */
677 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
678#endif
679}
680
681
682
683#ifdef IN_RING3
684# ifdef RT_OS_WINDOWS
685
686/**
687 * Initializes the unwind info structures for windows hosts.
688 */
689static int
690iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
691 void *pvChunk, uint32_t idxChunk)
692{
693 RT_NOREF(pVCpu);
694
695 /*
696 * The AMD64 unwind opcodes.
697 *
698 * This is a program that starts with RSP after a RET instruction that
699 * ends up in recompiled code, and the operations we describe here will
700 * restore all non-volatile registers and bring RSP back to where our
701 * RET address is. This means it's reverse order from what happens in
702 * the prologue.
703 *
704 * Note! Using a frame register approach here both because we have one
705 * and but mainly because the UWOP_ALLOC_LARGE argument values
706 * would be a pain to write initializers for. On the positive
707 * side, we're impervious to changes in the the stack variable
708 * area can can deal with dynamic stack allocations if necessary.
709 */
710 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
711 {
712 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
713 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
714 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
715 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
716 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
717 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
718 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
719 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
720 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
721 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
722 };
723 union
724 {
725 IMAGE_UNWIND_INFO Info;
726 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
727 } s_UnwindInfo =
728 {
729 {
730 /* .Version = */ 1,
731 /* .Flags = */ 0,
732 /* .SizeOfProlog = */ 16, /* whatever */
733 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
734 /* .FrameRegister = */ X86_GREG_xBP,
735 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
736 }
737 };
738 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
739 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
740
741 /*
742 * Calc how much space we need and allocate it off the exec heap.
743 */
744 unsigned const cFunctionEntries = 1;
745 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
746 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
747# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
749 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
750 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
751# else
752 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
753 - pExecMemAllocator->cbHeapBlockHdr;
754 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
755 32 /*cbAlignment*/);
756# endif
757 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
758 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
759
760 /*
761 * Initialize the structures.
762 */
763 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
764
765 paFunctions[0].BeginAddress = 0;
766 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
767 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
768
769 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
770 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
771
772 /*
773 * Register it.
774 */
775 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
776 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
777
778 return VINF_SUCCESS;
779}
780
781
782# else /* !RT_OS_WINDOWS */
783
784/**
785 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
786 */
787DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
788{
789 if (iValue >= 64)
790 {
791 Assert(iValue < 0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
794 }
795 else if (iValue >= 0)
796 *Ptr.pb++ = (uint8_t)iValue;
797 else if (iValue > -64)
798 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
799 else
800 {
801 Assert(iValue > -0x2000);
802 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
803 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
804 }
805 return Ptr;
806}
807
808
809/**
810 * Emits an ULEB128 encoded value (up to 64-bit wide).
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
813{
814 while (uValue >= 0x80)
815 {
816 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
817 uValue >>= 7;
818 }
819 *Ptr.pb++ = (uint8_t)uValue;
820 return Ptr;
821}
822
823
824/**
825 * Emits a CFA rule as register @a uReg + offset @a off.
826 */
827DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
828{
829 *Ptr.pb++ = DW_CFA_def_cfa;
830 Ptr = iemDwarfPutUleb128(Ptr, uReg);
831 Ptr = iemDwarfPutUleb128(Ptr, off);
832 return Ptr;
833}
834
835
836/**
837 * Emits a register (@a uReg) save location:
838 * CFA + @a off * data_alignment_factor
839 */
840DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
841{
842 if (uReg < 0x40)
843 *Ptr.pb++ = DW_CFA_offset | uReg;
844 else
845 {
846 *Ptr.pb++ = DW_CFA_offset_extended;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 }
849 Ptr = iemDwarfPutUleb128(Ptr, off);
850 return Ptr;
851}
852
853
854# if 0 /* unused */
855/**
856 * Emits a register (@a uReg) save location, using signed offset:
857 * CFA + @a offSigned * data_alignment_factor
858 */
859DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
860{
861 *Ptr.pb++ = DW_CFA_offset_extended_sf;
862 Ptr = iemDwarfPutUleb128(Ptr, uReg);
863 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
864 return Ptr;
865}
866# endif
867
868
869/**
870 * Initializes the unwind info section for non-windows hosts.
871 */
872static int
873iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
874 void *pvChunk, uint32_t idxChunk)
875{
876 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
877 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
878
879 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
880
881 /*
882 * Generate the CIE first.
883 */
884# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
885 uint8_t const iDwarfVer = 3;
886# else
887 uint8_t const iDwarfVer = 4;
888# endif
889 RTPTRUNION const PtrCie = Ptr;
890 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
891 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
892 *Ptr.pb++ = iDwarfVer; /* DwARF version */
893 *Ptr.pb++ = 0; /* Augmentation. */
894 if (iDwarfVer >= 4)
895 {
896 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
897 *Ptr.pb++ = 0; /* Segment selector size. */
898 }
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
901# else
902 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
903# endif
904 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
905# ifdef RT_ARCH_AMD64
906 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
907# elif defined(RT_ARCH_ARM64)
908 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
909# else
910# error "port me"
911# endif
912 /* Initial instructions: */
913# ifdef RT_ARCH_AMD64
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
922# elif defined(RT_ARCH_ARM64)
923# if 1
924 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
925# else
926 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
927# endif
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
936 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
937 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
938 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
939 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
940 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
941 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
942# else
943# error "port me"
944# endif
945 while ((Ptr.u - PtrCie.u) & 3)
946 *Ptr.pb++ = DW_CFA_nop;
947 /* Finalize the CIE size. */
948 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
949
950 /*
951 * Generate an FDE for the whole chunk area.
952 */
953# ifdef IEMNATIVE_USE_LIBUNWIND
954 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
955# endif
956 RTPTRUNION const PtrFde = Ptr;
957 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
958 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
959 Ptr.pu32++;
960 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
961 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
962# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
963 *Ptr.pb++ = DW_CFA_nop;
964# endif
965 while ((Ptr.u - PtrFde.u) & 3)
966 *Ptr.pb++ = DW_CFA_nop;
967 /* Finalize the FDE size. */
968 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
969
970 /* Terminator entry. */
971 *Ptr.pu32++ = 0;
972 *Ptr.pu32++ = 0; /* just to be sure... */
973 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
974
975 /*
976 * Register it.
977 */
978# ifdef IEMNATIVE_USE_LIBUNWIND
979 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
980# else
981 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
982 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
983# endif
984
985# ifdef IEMNATIVE_USE_GDB_JIT
986 /*
987 * Now for telling GDB about this (experimental).
988 *
989 * This seems to work best with ET_DYN.
990 */
991 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
992# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
993 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
994 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
995# else
996 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
997 - pExecMemAllocator->cbHeapBlockHdr;
998 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
999# endif
1000 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1001 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1002
1003 RT_ZERO(*pSymFile);
1004
1005 /*
1006 * The ELF header:
1007 */
1008 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1009 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1010 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1011 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1012 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1013 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1014 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1015 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1016# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1017 pSymFile->EHdr.e_type = ET_DYN;
1018# else
1019 pSymFile->EHdr.e_type = ET_REL;
1020# endif
1021# ifdef RT_ARCH_AMD64
1022 pSymFile->EHdr.e_machine = EM_AMD64;
1023# elif defined(RT_ARCH_ARM64)
1024 pSymFile->EHdr.e_machine = EM_AARCH64;
1025# else
1026# error "port me"
1027# endif
1028 pSymFile->EHdr.e_version = 1; /*?*/
1029 pSymFile->EHdr.e_entry = 0;
1030# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1031 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1032# else
1033 pSymFile->EHdr.e_phoff = 0;
1034# endif
1035 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1036 pSymFile->EHdr.e_flags = 0;
1037 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1038# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1039 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1040 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1041# else
1042 pSymFile->EHdr.e_phentsize = 0;
1043 pSymFile->EHdr.e_phnum = 0;
1044# endif
1045 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1046 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1047 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1048
1049 uint32_t offStrTab = 0;
1050#define APPEND_STR(a_szStr) do { \
1051 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1052 offStrTab += sizeof(a_szStr); \
1053 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1054 } while (0)
1055#define APPEND_STR_FMT(a_szStr, ...) do { \
1056 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1057 offStrTab++; \
1058 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1059 } while (0)
1060
1061 /*
1062 * Section headers.
1063 */
1064 /* Section header #0: NULL */
1065 unsigned i = 0;
1066 APPEND_STR("");
1067 RT_ZERO(pSymFile->aShdrs[i]);
1068 i++;
1069
1070 /* Section header: .eh_frame */
1071 pSymFile->aShdrs[i].sh_name = offStrTab;
1072 APPEND_STR(".eh_frame");
1073 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1074 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1075# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1076 pSymFile->aShdrs[i].sh_offset
1077 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1078# else
1079 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1080 pSymFile->aShdrs[i].sh_offset = 0;
1081# endif
1082
1083 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1084 pSymFile->aShdrs[i].sh_link = 0;
1085 pSymFile->aShdrs[i].sh_info = 0;
1086 pSymFile->aShdrs[i].sh_addralign = 1;
1087 pSymFile->aShdrs[i].sh_entsize = 0;
1088 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1089 i++;
1090
1091 /* Section header: .shstrtab */
1092 unsigned const iShStrTab = i;
1093 pSymFile->EHdr.e_shstrndx = iShStrTab;
1094 pSymFile->aShdrs[i].sh_name = offStrTab;
1095 APPEND_STR(".shstrtab");
1096 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1097 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1098# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1099 pSymFile->aShdrs[i].sh_offset
1100 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1101# else
1102 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1103 pSymFile->aShdrs[i].sh_offset = 0;
1104# endif
1105 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1106 pSymFile->aShdrs[i].sh_link = 0;
1107 pSymFile->aShdrs[i].sh_info = 0;
1108 pSymFile->aShdrs[i].sh_addralign = 1;
1109 pSymFile->aShdrs[i].sh_entsize = 0;
1110 i++;
1111
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".symtab");
1115 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1124 i++;
1125
1126# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1127 /* Section header: .symbols */
1128 pSymFile->aShdrs[i].sh_name = offStrTab;
1129 APPEND_STR(".dynsym");
1130 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1131 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1134 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1135 pSymFile->aShdrs[i].sh_link = iShStrTab;
1136 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1137 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1138 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1139 i++;
1140# endif
1141
1142# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1143 /* Section header: .dynamic */
1144 pSymFile->aShdrs[i].sh_name = offStrTab;
1145 APPEND_STR(".dynamic");
1146 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1147 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1148 pSymFile->aShdrs[i].sh_offset
1149 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1150 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1151 pSymFile->aShdrs[i].sh_link = iShStrTab;
1152 pSymFile->aShdrs[i].sh_info = 0;
1153 pSymFile->aShdrs[i].sh_addralign = 1;
1154 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1155 i++;
1156# endif
1157
1158 /* Section header: .text */
1159 unsigned const iShText = i;
1160 pSymFile->aShdrs[i].sh_name = offStrTab;
1161 APPEND_STR(".text");
1162 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1163 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1164# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1165 pSymFile->aShdrs[i].sh_offset
1166 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1167# else
1168 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1169 pSymFile->aShdrs[i].sh_offset = 0;
1170# endif
1171 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1172 pSymFile->aShdrs[i].sh_link = 0;
1173 pSymFile->aShdrs[i].sh_info = 0;
1174 pSymFile->aShdrs[i].sh_addralign = 1;
1175 pSymFile->aShdrs[i].sh_entsize = 0;
1176 i++;
1177
1178 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1179
1180# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1181 /*
1182 * The program headers:
1183 */
1184 /* Everything in a single LOAD segment: */
1185 i = 0;
1186 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1187 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = 0;
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1193 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1194 i++;
1195 /* The .dynamic segment. */
1196 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1197 pSymFile->aPhdrs[i].p_flags = PF_R;
1198 pSymFile->aPhdrs[i].p_offset
1199 = pSymFile->aPhdrs[i].p_vaddr
1200 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1201 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1202 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1203 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1204 i++;
1205
1206 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1207
1208 /*
1209 * The dynamic section:
1210 */
1211 i = 0;
1212 pSymFile->aDyn[i].d_tag = DT_SONAME;
1213 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1214 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1223 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1224 i++;
1225 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1226 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1227 i++;
1228 pSymFile->aDyn[i].d_tag = DT_NULL;
1229 i++;
1230 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1231# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1232
1233 /*
1234 * Symbol tables:
1235 */
1236 /** @todo gdb doesn't seem to really like this ... */
1237 i = 0;
1238 pSymFile->aSymbols[i].st_name = 0;
1239 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1240 pSymFile->aSymbols[i].st_value = 0;
1241 pSymFile->aSymbols[i].st_size = 0;
1242 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1243 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1244# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1245 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1246# endif
1247 i++;
1248
1249 pSymFile->aSymbols[i].st_name = 0;
1250 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1251 pSymFile->aSymbols[i].st_value = 0;
1252 pSymFile->aSymbols[i].st_size = 0;
1253 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1254 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1255 i++;
1256
1257 pSymFile->aSymbols[i].st_name = offStrTab;
1258 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1259# if 0
1260 pSymFile->aSymbols[i].st_shndx = iShText;
1261 pSymFile->aSymbols[i].st_value = 0;
1262# else
1263 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1264 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1265# endif
1266 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1267 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1268 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1269# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1270 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1271 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1272# endif
1273 i++;
1274
1275 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1276 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1277
1278 /*
1279 * The GDB JIT entry and informing GDB.
1280 */
1281 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1282# if 1
1283 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1284# else
1285 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1286# endif
1287
1288 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1289 RTCritSectEnter(&g_IemNativeGdbJitLock);
1290 pEhFrame->GdbJitEntry.pNext = NULL;
1291 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1292 if (__jit_debug_descriptor.pTail)
1293 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1294 else
1295 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1296 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1297 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1298
1299 /* Notify GDB: */
1300 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1301 __jit_debug_register_code();
1302 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1303 RTCritSectLeave(&g_IemNativeGdbJitLock);
1304
1305# else /* !IEMNATIVE_USE_GDB_JIT */
1306 RT_NOREF(pVCpu);
1307# endif /* !IEMNATIVE_USE_GDB_JIT */
1308
1309 return VINF_SUCCESS;
1310}
1311
1312# endif /* !RT_OS_WINDOWS */
1313#endif /* IN_RING3 */
1314
1315
1316/**
1317 * Adds another chunk to the executable memory allocator.
1318 *
1319 * This is used by the init code for the initial allocation and later by the
1320 * regular allocator function when it's out of memory.
1321 */
1322static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1323{
1324 /* Check that we've room for growth. */
1325 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1326 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1327
1328 /* Allocate a chunk. */
1329#ifdef RT_OS_DARWIN
1330 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1331#else
1332 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1333#endif
1334 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1335
1336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1337 int rc = VINF_SUCCESS;
1338#else
1339 /* Initialize the heap for the chunk. */
1340 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1341 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1342 AssertRC(rc);
1343 if (RT_SUCCESS(rc))
1344 {
1345 /*
1346 * We want the memory to be aligned on 64 byte, so the first time thru
1347 * here we do some exploratory allocations to see how we can achieve this.
1348 * On subsequent runs we only make an initial adjustment allocation, if
1349 * necessary.
1350 *
1351 * Since we own the heap implementation, we know that the internal block
1352 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1353 * so all we need to wrt allocation size adjustments is to add 32 bytes
1354 * to the size, align up by 64 bytes, and subtract 32 bytes.
1355 *
1356 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1357 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1358 * allocation to force subsequent allocations to return 64 byte aligned
1359 * user areas.
1360 */
1361 if (!pExecMemAllocator->cbHeapBlockHdr)
1362 {
1363 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1364 pExecMemAllocator->cbHeapAlignTweak = 64;
1365 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1366 32 /*cbAlignment*/);
1367 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1368
1369 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1376 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1377 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1378 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1379 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1380
1381 RTHeapSimpleFree(hHeap, pvTest2);
1382 RTHeapSimpleFree(hHeap, pvTest1);
1383 }
1384 else
1385 {
1386 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1387 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1388 }
1389 if (RT_SUCCESS(rc))
1390#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1391 {
1392 /*
1393 * Add the chunk.
1394 *
1395 * This must be done before the unwind init so windows can allocate
1396 * memory from the chunk when using the alternative sub-allocator.
1397 */
1398 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1399#ifdef IN_RING3
1400 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1401#endif
1402#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1403 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1404#else
1405 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1406 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1407 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1408 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1409#endif
1410
1411 pExecMemAllocator->cChunks = idxChunk + 1;
1412 pExecMemAllocator->idxChunkHint = idxChunk;
1413
1414#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1415 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1416 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1417#else
1418 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1419 pExecMemAllocator->cbTotal += cbFree;
1420 pExecMemAllocator->cbFree += cbFree;
1421#endif
1422
1423#ifdef IN_RING3
1424 /*
1425 * Initialize the unwind information (this cannot really fail atm).
1426 * (This sets pvUnwindInfo.)
1427 */
1428 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1429 if (RT_SUCCESS(rc))
1430#endif
1431 {
1432 return VINF_SUCCESS;
1433 }
1434
1435#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1436 /* Just in case the impossible happens, undo the above up: */
1437 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1438 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1439 pExecMemAllocator->cChunks = idxChunk;
1440 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1441 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1442 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1443 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1444#endif
1445 }
1446#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1447 }
1448#endif
1449 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1450 RT_NOREF(pVCpu);
1451 return rc;
1452}
1453
1454
1455/**
1456 * Initializes the executable memory allocator for native recompilation on the
1457 * calling EMT.
1458 *
1459 * @returns VBox status code.
1460 * @param pVCpu The cross context virtual CPU structure of the calling
1461 * thread.
1462 * @param cbMax The max size of the allocator.
1463 * @param cbInitial The initial allocator size.
1464 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1465 * dependent).
1466 */
1467int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1468{
1469 /*
1470 * Validate input.
1471 */
1472 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1473 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1474 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1475 || cbChunk == 0
1476 || ( RT_IS_POWER_OF_TWO(cbChunk)
1477 && cbChunk >= _1M
1478 && cbChunk <= _256M
1479 && cbChunk <= cbMax),
1480 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1481 VERR_OUT_OF_RANGE);
1482
1483 /*
1484 * Adjust/figure out the chunk size.
1485 */
1486 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1487 {
1488 if (cbMax >= _256M)
1489 cbChunk = _64M;
1490 else
1491 {
1492 if (cbMax < _16M)
1493 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1494 else
1495 cbChunk = (uint32_t)cbMax / 4;
1496 if (!RT_IS_POWER_OF_TWO(cbChunk))
1497 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1498 }
1499 }
1500
1501 if (cbChunk > cbMax)
1502 cbMax = cbChunk;
1503 else
1504 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1505 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1506 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1507
1508 /*
1509 * Allocate and initialize the allocatore instance.
1510 */
1511 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1512#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1513 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1514 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1515 cbNeeded += cbBitmap * cMaxChunks;
1516 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1517 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1518#endif
1519#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1520 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1521 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1522#endif
1523 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1524 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1525 VERR_NO_MEMORY);
1526 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1527 pExecMemAllocator->cbChunk = cbChunk;
1528 pExecMemAllocator->cMaxChunks = cMaxChunks;
1529 pExecMemAllocator->cChunks = 0;
1530 pExecMemAllocator->idxChunkHint = 0;
1531 pExecMemAllocator->cAllocations = 0;
1532 pExecMemAllocator->cbTotal = 0;
1533 pExecMemAllocator->cbFree = 0;
1534 pExecMemAllocator->cbAllocated = 0;
1535#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1536 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1537 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1538 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1539 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1540#endif
1541#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1542 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1543#endif
1544 for (uint32_t i = 0; i < cMaxChunks; i++)
1545 {
1546#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1547 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1548 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1549#else
1550 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1551#endif
1552 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1553#ifdef IN_RING0
1554 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1555#else
1556 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1557#endif
1558 }
1559 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1560
1561 /*
1562 * Do the initial allocations.
1563 */
1564 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1565 {
1566 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1567 AssertLogRelRCReturn(rc, rc);
1568 }
1569
1570 pExecMemAllocator->idxChunkHint = 0;
1571
1572 return VINF_SUCCESS;
1573}
1574
1575
1576/*********************************************************************************************************************************
1577* Native Recompilation *
1578*********************************************************************************************************************************/
1579
1580
1581/**
1582 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1588}
1589
1590
1591/**
1592 * Used by TB code when it wants to raise a \#GP(0).
1593 */
1594IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1595{
1596 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1597#ifndef _MSC_VER
1598 return VINF_IEM_RAISED_XCPT; /* not reached */
1599#endif
1600}
1601
1602
1603/**
1604 * Used by TB code when it wants to raise a \#NM.
1605 */
1606IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1607{
1608 iemRaiseDeviceNotAvailableJmp(pVCpu);
1609#ifndef _MSC_VER
1610 return VINF_IEM_RAISED_XCPT; /* not reached */
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code when it wants to raise a \#UD.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1619{
1620 iemRaiseUndefinedOpcodeJmp(pVCpu);
1621#ifndef _MSC_VER
1622 return VINF_IEM_RAISED_XCPT; /* not reached */
1623#endif
1624}
1625
1626
1627/**
1628 * Used by TB code when it wants to raise a \#MF.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1631{
1632 iemRaiseMathFaultJmp(pVCpu);
1633#ifndef _MSC_VER
1634 return VINF_IEM_RAISED_XCPT; /* not reached */
1635#endif
1636}
1637
1638
1639/**
1640 * Used by TB code when it wants to raise a \#XF.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1643{
1644 iemRaiseSimdFpExceptionJmp(pVCpu);
1645#ifndef _MSC_VER
1646 return VINF_IEM_RAISED_XCPT; /* not reached */
1647#endif
1648}
1649
1650
1651/**
1652 * Used by TB code when detecting opcode changes.
1653 * @see iemThreadeFuncWorkerObsoleteTb
1654 */
1655IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1656{
1657 /* We set fSafeToFree to false where as we're being called in the context
1658 of a TB callback function, which for native TBs means we cannot release
1659 the executable memory till we've returned our way back to iemTbExec as
1660 that return path codes via the native code generated for the TB. */
1661 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1662 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667/**
1668 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1669 */
1670IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1671{
1672 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1673 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1674 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1675 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1676 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1677 return VINF_IEM_REEXEC_BREAK;
1678}
1679
1680
1681/**
1682 * Used by TB code when we missed a PC check after a branch.
1683 */
1684IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1685{
1686 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1687 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1688 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1689 pVCpu->iem.s.pbInstrBuf));
1690 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1691 return VINF_IEM_REEXEC_BREAK;
1692}
1693
1694
1695
1696/*********************************************************************************************************************************
1697* Helpers: Segmented memory fetches and stores. *
1698*********************************************************************************************************************************/
1699
1700/**
1701 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/**
1714 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1715 * to 16 bits.
1716 */
1717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1718{
1719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1720 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1721#else
1722 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1723#endif
1724}
1725
1726
1727/**
1728 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1729 * to 32 bits.
1730 */
1731IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1732{
1733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1734 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1735#else
1736 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1737#endif
1738}
1739
1740/**
1741 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1742 * to 64 bits.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1745{
1746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1747 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1748#else
1749 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1750#endif
1751}
1752
1753
1754/**
1755 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1769 * to 32 bits.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1772{
1773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1774 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1775#else
1776 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1777#endif
1778}
1779
1780
1781/**
1782 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1783 * to 64 bits.
1784 */
1785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1786{
1787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1788 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1789#else
1790 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1791#endif
1792}
1793
1794
1795/**
1796 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1799{
1800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1801 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1802#else
1803 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1804#endif
1805}
1806
1807
1808/**
1809 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1810 * to 64 bits.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1828 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1829#else
1830 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1831#endif
1832}
1833
1834
1835/**
1836 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1837 */
1838IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1839{
1840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1841 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1842#else
1843 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1844#endif
1845}
1846
1847
1848/**
1849 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1850 */
1851IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1852{
1853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1854 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1855#else
1856 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1857#endif
1858}
1859
1860
1861/**
1862 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1865{
1866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1867 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1868#else
1869 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1870#endif
1871}
1872
1873
1874/**
1875 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1880 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1881#else
1882 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1883#endif
1884}
1885
1886
1887
1888/**
1889 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1895#else
1896 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1908#else
1909 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to store an 32-bit selector value onto a generic stack.
1916 *
1917 * Intel CPUs doesn't do write a whole dword, thus the special function.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1922 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1923#else
1924 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1935 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1936#else
1937 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1948 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1949#else
1950 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1957 */
1958IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1959{
1960#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1961 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1962#else
1963 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1964#endif
1965}
1966
1967
1968/**
1969 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1970 */
1971IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1972{
1973#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1974 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1975#else
1976 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1977#endif
1978}
1979
1980
1981
1982/*********************************************************************************************************************************
1983* Helpers: Flat memory fetches and stores. *
1984*********************************************************************************************************************************/
1985
1986/**
1987 * Used by TB code to load unsigned 8-bit data w/ flat address.
1988 * @note Zero extending the value to 64-bit to simplify assembly.
1989 */
1990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1991{
1992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1993 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1994#else
1995 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1996#endif
1997}
1998
1999
2000/**
2001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2002 * to 16 bits.
2003 * @note Zero extending the value to 64-bit to simplify assembly.
2004 */
2005IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2006{
2007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2009#else
2010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2011#endif
2012}
2013
2014
2015/**
2016 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2017 * to 32 bits.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2032 * to 64 bits.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2035{
2036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2037 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2038#else
2039 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2040#endif
2041}
2042
2043
2044/**
2045 * Used by TB code to load unsigned 16-bit data w/ flat address.
2046 * @note Zero extending the value to 64-bit to simplify assembly.
2047 */
2048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2049{
2050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2051 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2052#else
2053 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2054#endif
2055}
2056
2057
2058/**
2059 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2060 * to 32 bits.
2061 * @note Zero extending the value to 64-bit to simplify assembly.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2067#else
2068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2075 * to 64 bits.
2076 * @note Zero extending the value to 64-bit to simplify assembly.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2081 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2082#else
2083 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to load unsigned 32-bit data w/ flat address.
2090 * @note Zero extending the value to 64-bit to simplify assembly.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2104 * to 64 bits.
2105 * @note Zero extending the value to 64-bit to simplify assembly.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2110 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2111#else
2112 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to load unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2123 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2124#else
2125 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2126#endif
2127}
2128
2129
2130/**
2131 * Used by TB code to store unsigned 8-bit data w/ flat address.
2132 */
2133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2134{
2135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2136 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2137#else
2138 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2139#endif
2140}
2141
2142
2143/**
2144 * Used by TB code to store unsigned 16-bit data w/ flat address.
2145 */
2146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2147{
2148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2149 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2150#else
2151 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2152#endif
2153}
2154
2155
2156/**
2157 * Used by TB code to store unsigned 32-bit data w/ flat address.
2158 */
2159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2160{
2161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2162 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2163#else
2164 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2165#endif
2166}
2167
2168
2169/**
2170 * Used by TB code to store unsigned 64-bit data w/ flat address.
2171 */
2172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2173{
2174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2175 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2176#else
2177 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2178#endif
2179}
2180
2181
2182
2183/**
2184 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2190#else
2191 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2203#else
2204 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to store a segment selector value onto a flat stack.
2211 *
2212 * Intel CPUs doesn't do write a whole dword, thus the special function.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2217 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2218#else
2219 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2220#endif
2221}
2222
2223
2224/**
2225 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2226 */
2227IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2228{
2229#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2230 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2231#else
2232 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2233#endif
2234}
2235
2236
2237/**
2238 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2243 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2244#else
2245 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2246#endif
2247}
2248
2249
2250/**
2251 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2256 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2257#else
2258 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2259#endif
2260}
2261
2262
2263/**
2264 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2269 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2270#else
2271 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276
2277/*********************************************************************************************************************************
2278* Helpers: Segmented memory mapping. *
2279*********************************************************************************************************************************/
2280
2281/**
2282 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2283 * segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2312 */
2313IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2314 RTGCPTR GCPtrMem, uint8_t iSegReg))
2315{
2316#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2317 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2318#else
2319 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2320#endif
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2328 RTGCPTR GCPtrMem, uint8_t iSegReg))
2329{
2330#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2331 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2332#else
2333 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2334#endif
2335}
2336
2337
2338/**
2339 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2340 * segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2369 */
2370IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2371 RTGCPTR GCPtrMem, uint8_t iSegReg))
2372{
2373#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2374 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2375#else
2376 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2377#endif
2378}
2379
2380
2381/**
2382 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2383 */
2384IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2385 RTGCPTR GCPtrMem, uint8_t iSegReg))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2388 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2389#else
2390 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2397 * segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2426 */
2427IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2428 RTGCPTR GCPtrMem, uint8_t iSegReg))
2429{
2430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2431 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2432#else
2433 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2434#endif
2435}
2436
2437
2438/**
2439 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2440 */
2441IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2442 RTGCPTR GCPtrMem, uint8_t iSegReg))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2445 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2446#else
2447 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2454 * segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2511 */
2512IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2513 RTGCPTR GCPtrMem, uint8_t iSegReg))
2514{
2515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2516 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2517#else
2518 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2519#endif
2520}
2521
2522
2523/**
2524 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2525 */
2526IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2527 RTGCPTR GCPtrMem, uint8_t iSegReg))
2528{
2529#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2530 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2531#else
2532 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2533#endif
2534}
2535
2536
2537/**
2538 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2539 * segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/**
2567 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2568 */
2569IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2570 RTGCPTR GCPtrMem, uint8_t iSegReg))
2571{
2572#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2573 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2574#else
2575 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2576#endif
2577}
2578
2579
2580/**
2581 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2582 */
2583IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2584 RTGCPTR GCPtrMem, uint8_t iSegReg))
2585{
2586#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2587 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2588#else
2589 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2590#endif
2591}
2592
2593
2594/*********************************************************************************************************************************
2595* Helpers: Flat memory mapping. *
2596*********************************************************************************************************************************/
2597
2598/**
2599 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2600 * address.
2601 */
2602IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2603{
2604#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2605 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2606#else
2607 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2608#endif
2609}
2610
2611
2612/**
2613 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2614 */
2615IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2616{
2617#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2618 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2619#else
2620 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2621#endif
2622}
2623
2624
2625/**
2626 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2627 */
2628IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2629{
2630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2631 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2632#else
2633 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2634#endif
2635}
2636
2637
2638/**
2639 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2640 */
2641IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2642{
2643#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2644 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2645#else
2646 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2647#endif
2648}
2649
2650
2651/**
2652 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2653 * address.
2654 */
2655IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2656{
2657#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2658 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2659#else
2660 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2661#endif
2662}
2663
2664
2665/**
2666 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2667 */
2668IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2672#else
2673 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2682{
2683#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2684 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2685#else
2686 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2687#endif
2688}
2689
2690
2691/**
2692 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2693 */
2694IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2698#else
2699 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2706 * address.
2707 */
2708IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2712#else
2713 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2722{
2723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2724 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2725#else
2726 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2727#endif
2728}
2729
2730
2731/**
2732 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2733 */
2734IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2735{
2736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2737 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2738#else
2739 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2740#endif
2741}
2742
2743
2744/**
2745 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2746 */
2747IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2748{
2749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2750 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2751#else
2752 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2753#endif
2754}
2755
2756
2757/**
2758 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2759 * address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2786 */
2787IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2788{
2789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2790 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2791#else
2792 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2793#endif
2794}
2795
2796
2797/**
2798 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2799 */
2800IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2801{
2802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2803 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2804#else
2805 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2806#endif
2807}
2808
2809
2810/**
2811 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2812 */
2813IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2814{
2815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2816 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2817#else
2818 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2819#endif
2820}
2821
2822
2823/**
2824 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2825 */
2826IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2827{
2828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2829 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2830#else
2831 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2832#endif
2833}
2834
2835
2836/**
2837 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2838 * address.
2839 */
2840IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2841{
2842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2843 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2844#else
2845 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2846#endif
2847}
2848
2849
2850/**
2851 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2857#else
2858 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2867{
2868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2869 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2870#else
2871 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2872#endif
2873}
2874
2875
2876/**
2877 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2878 */
2879IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2880{
2881#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2882 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2883#else
2884 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2885#endif
2886}
2887
2888
2889/*********************************************************************************************************************************
2890* Helpers: Commit, rollback & unmap *
2891*********************************************************************************************************************************/
2892
2893/**
2894 * Used by TB code to commit and unmap a read-write memory mapping.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2897{
2898 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2899}
2900
2901
2902/**
2903 * Used by TB code to commit and unmap a read-write memory mapping.
2904 */
2905IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2906{
2907 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2908}
2909
2910
2911/**
2912 * Used by TB code to commit and unmap a write-only memory mapping.
2913 */
2914IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2915{
2916 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2917}
2918
2919
2920/**
2921 * Used by TB code to commit and unmap a read-only memory mapping.
2922 */
2923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2924{
2925 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2926}
2927
2928
2929/**
2930 * Reinitializes the native recompiler state.
2931 *
2932 * Called before starting a new recompile job.
2933 */
2934static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2935{
2936 pReNative->cLabels = 0;
2937 pReNative->bmLabelTypes = 0;
2938 pReNative->cFixups = 0;
2939#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2940 pReNative->pDbgInfo->cEntries = 0;
2941#endif
2942 pReNative->pTbOrg = pTb;
2943 pReNative->cCondDepth = 0;
2944 pReNative->uCondSeqNo = 0;
2945 pReNative->uCheckIrqSeqNo = 0;
2946 pReNative->uTlbSeqNo = 0;
2947
2948#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2949 pReNative->Core.offPc = 0;
2950 pReNative->Core.cInstrPcUpdateSkipped = 0;
2951#endif
2952 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2953#if IEMNATIVE_HST_GREG_COUNT < 32
2954 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2955#endif
2956 ;
2957 pReNative->Core.bmHstRegsWithGstShadow = 0;
2958 pReNative->Core.bmGstRegShadows = 0;
2959 pReNative->Core.bmVars = 0;
2960 pReNative->Core.bmStack = 0;
2961 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2962 pReNative->Core.u64ArgVars = UINT64_MAX;
2963
2964 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2965 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2968 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2969 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2970 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2971 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2972 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2973 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2974 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2975 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2976 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2977 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2978
2979 /* Full host register reinit: */
2980 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2981 {
2982 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2983 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2984 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2985 }
2986
2987 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2988 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2989#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2990 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2991#endif
2992#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2993 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2994#endif
2995#ifdef IEMNATIVE_REG_FIXED_TMP1
2996 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2999 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3000#endif
3001 );
3002 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3003 {
3004 fRegs &= ~RT_BIT_32(idxReg);
3005 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3006 }
3007
3008 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3009#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3010 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3011#endif
3012#ifdef IEMNATIVE_REG_FIXED_TMP0
3013 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3014#endif
3015#ifdef IEMNATIVE_REG_FIXED_TMP1
3016 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3017#endif
3018#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3019 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3020#endif
3021
3022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3023# ifdef RT_ARCH_ARM64
3024 /*
3025 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3026 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3027 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3028 * and the register allocator assumes that it will be always free when the lower is picked.
3029 */
3030 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3031# else
3032 uint32_t const fFixedAdditional = 0;
3033# endif
3034
3035 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3036 | fFixedAdditional
3037# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3038 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3039# endif
3040 ;
3041 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3042 pReNative->Core.bmGstSimdRegShadows = 0;
3043 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3044 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3045
3046 /* Full host register reinit: */
3047 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3048 {
3049 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3050 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3051 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3052 }
3053
3054 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3055 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3056 {
3057 fRegs &= ~RT_BIT_32(idxReg);
3058 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3059 }
3060
3061#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3062 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3063#endif
3064
3065#endif
3066
3067 return pReNative;
3068}
3069
3070
3071/**
3072 * Allocates and initializes the native recompiler state.
3073 *
3074 * This is called the first time an EMT wants to recompile something.
3075 *
3076 * @returns Pointer to the new recompiler state.
3077 * @param pVCpu The cross context virtual CPU structure of the calling
3078 * thread.
3079 * @param pTb The TB that's about to be recompiled.
3080 * @thread EMT(pVCpu)
3081 */
3082static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3083{
3084 VMCPU_ASSERT_EMT(pVCpu);
3085
3086 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3087 AssertReturn(pReNative, NULL);
3088
3089 /*
3090 * Try allocate all the buffers and stuff we need.
3091 */
3092 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3093 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3094 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3095#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3096 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3097#endif
3098 if (RT_LIKELY( pReNative->pInstrBuf
3099 && pReNative->paLabels
3100 && pReNative->paFixups)
3101#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3102 && pReNative->pDbgInfo
3103#endif
3104 )
3105 {
3106 /*
3107 * Set the buffer & array sizes on success.
3108 */
3109 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3110 pReNative->cLabelsAlloc = _8K;
3111 pReNative->cFixupsAlloc = _16K;
3112#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3113 pReNative->cDbgInfoAlloc = _16K;
3114#endif
3115
3116 /* Other constant stuff: */
3117 pReNative->pVCpu = pVCpu;
3118
3119 /*
3120 * Done, just need to save it and reinit it.
3121 */
3122 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3123 return iemNativeReInit(pReNative, pTb);
3124 }
3125
3126 /*
3127 * Failed. Cleanup and return.
3128 */
3129 AssertFailed();
3130 RTMemFree(pReNative->pInstrBuf);
3131 RTMemFree(pReNative->paLabels);
3132 RTMemFree(pReNative->paFixups);
3133#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3134 RTMemFree(pReNative->pDbgInfo);
3135#endif
3136 RTMemFree(pReNative);
3137 return NULL;
3138}
3139
3140
3141/**
3142 * Creates a label
3143 *
3144 * If the label does not yet have a defined position,
3145 * call iemNativeLabelDefine() later to set it.
3146 *
3147 * @returns Label ID. Throws VBox status code on failure, so no need to check
3148 * the return value.
3149 * @param pReNative The native recompile state.
3150 * @param enmType The label type.
3151 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3152 * label is not yet defined (default).
3153 * @param uData Data associated with the lable. Only applicable to
3154 * certain type of labels. Default is zero.
3155 */
3156DECL_HIDDEN_THROW(uint32_t)
3157iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3158 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3159{
3160 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3161
3162 /*
3163 * Locate existing label definition.
3164 *
3165 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3166 * and uData is zero.
3167 */
3168 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3169 uint32_t const cLabels = pReNative->cLabels;
3170 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3171#ifndef VBOX_STRICT
3172 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3173 && offWhere == UINT32_MAX
3174 && uData == 0
3175#endif
3176 )
3177 {
3178#ifndef VBOX_STRICT
3179 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3182 if (idxLabel < pReNative->cLabels)
3183 return idxLabel;
3184#else
3185 for (uint32_t i = 0; i < cLabels; i++)
3186 if ( paLabels[i].enmType == enmType
3187 && paLabels[i].uData == uData)
3188 {
3189 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3190 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3191 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3192 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3193 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3194 return i;
3195 }
3196 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3197 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3198#endif
3199 }
3200
3201 /*
3202 * Make sure we've got room for another label.
3203 */
3204 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3205 { /* likely */ }
3206 else
3207 {
3208 uint32_t cNew = pReNative->cLabelsAlloc;
3209 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3210 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3211 cNew *= 2;
3212 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3213 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3214 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3215 pReNative->paLabels = paLabels;
3216 pReNative->cLabelsAlloc = cNew;
3217 }
3218
3219 /*
3220 * Define a new label.
3221 */
3222 paLabels[cLabels].off = offWhere;
3223 paLabels[cLabels].enmType = enmType;
3224 paLabels[cLabels].uData = uData;
3225 pReNative->cLabels = cLabels + 1;
3226
3227 Assert((unsigned)enmType < 64);
3228 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3229
3230 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3231 {
3232 Assert(uData == 0);
3233 pReNative->aidxUniqueLabels[enmType] = cLabels;
3234 }
3235
3236 if (offWhere != UINT32_MAX)
3237 {
3238#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3239 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3240 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3241#endif
3242 }
3243 return cLabels;
3244}
3245
3246
3247/**
3248 * Defines the location of an existing label.
3249 *
3250 * @param pReNative The native recompile state.
3251 * @param idxLabel The label to define.
3252 * @param offWhere The position.
3253 */
3254DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3255{
3256 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3257 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3258 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3259 pLabel->off = offWhere;
3260#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3261 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3262 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3263#endif
3264}
3265
3266
3267/**
3268 * Looks up a lable.
3269 *
3270 * @returns Label ID if found, UINT32_MAX if not.
3271 */
3272static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3273 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3274{
3275 Assert((unsigned)enmType < 64);
3276 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3277 {
3278 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3279 return pReNative->aidxUniqueLabels[enmType];
3280
3281 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3282 uint32_t const cLabels = pReNative->cLabels;
3283 for (uint32_t i = 0; i < cLabels; i++)
3284 if ( paLabels[i].enmType == enmType
3285 && paLabels[i].uData == uData
3286 && ( paLabels[i].off == offWhere
3287 || offWhere == UINT32_MAX
3288 || paLabels[i].off == UINT32_MAX))
3289 return i;
3290 }
3291 return UINT32_MAX;
3292}
3293
3294
3295/**
3296 * Adds a fixup.
3297 *
3298 * @throws VBox status code (int) on failure.
3299 * @param pReNative The native recompile state.
3300 * @param offWhere The instruction offset of the fixup location.
3301 * @param idxLabel The target label ID for the fixup.
3302 * @param enmType The fixup type.
3303 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3304 */
3305DECL_HIDDEN_THROW(void)
3306iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3307 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3308{
3309 Assert(idxLabel <= UINT16_MAX);
3310 Assert((unsigned)enmType <= UINT8_MAX);
3311
3312 /*
3313 * Make sure we've room.
3314 */
3315 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3316 uint32_t const cFixups = pReNative->cFixups;
3317 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3318 { /* likely */ }
3319 else
3320 {
3321 uint32_t cNew = pReNative->cFixupsAlloc;
3322 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3323 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3324 cNew *= 2;
3325 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3326 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3327 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3328 pReNative->paFixups = paFixups;
3329 pReNative->cFixupsAlloc = cNew;
3330 }
3331
3332 /*
3333 * Add the fixup.
3334 */
3335 paFixups[cFixups].off = offWhere;
3336 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3337 paFixups[cFixups].enmType = enmType;
3338 paFixups[cFixups].offAddend = offAddend;
3339 pReNative->cFixups = cFixups + 1;
3340}
3341
3342
3343/**
3344 * Slow code path for iemNativeInstrBufEnsure.
3345 */
3346DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3347{
3348 /* Double the buffer size till we meet the request. */
3349 uint32_t cNew = pReNative->cInstrBufAlloc;
3350 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3351 do
3352 cNew *= 2;
3353 while (cNew < off + cInstrReq);
3354
3355 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3356#ifdef RT_ARCH_ARM64
3357 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3358#else
3359 uint32_t const cbMaxInstrBuf = _2M;
3360#endif
3361 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3362
3363 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3364 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3365
3366#ifdef VBOX_STRICT
3367 pReNative->offInstrBufChecked = off + cInstrReq;
3368#endif
3369 pReNative->cInstrBufAlloc = cNew;
3370 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3371}
3372
3373#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3374
3375/**
3376 * Grows the static debug info array used during recompilation.
3377 *
3378 * @returns Pointer to the new debug info block; throws VBox status code on
3379 * failure, so no need to check the return value.
3380 */
3381DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3382{
3383 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3384 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3385 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3386 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3387 pReNative->pDbgInfo = pDbgInfo;
3388 pReNative->cDbgInfoAlloc = cNew;
3389 return pDbgInfo;
3390}
3391
3392
3393/**
3394 * Adds a new debug info uninitialized entry, returning the pointer to it.
3395 */
3396DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3397{
3398 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3399 { /* likely */ }
3400 else
3401 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3402 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3403}
3404
3405
3406/**
3407 * Debug Info: Adds a native offset record, if necessary.
3408 */
3409static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3410{
3411 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3412
3413 /*
3414 * Search backwards to see if we've got a similar record already.
3415 */
3416 uint32_t idx = pDbgInfo->cEntries;
3417 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3418 while (idx-- > idxStop)
3419 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3420 {
3421 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3422 return;
3423 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3424 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3425 break;
3426 }
3427
3428 /*
3429 * Add it.
3430 */
3431 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3432 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3433 pEntry->NativeOffset.offNative = off;
3434}
3435
3436
3437/**
3438 * Debug Info: Record info about a label.
3439 */
3440static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3441{
3442 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3443 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3444 pEntry->Label.uUnused = 0;
3445 pEntry->Label.enmLabel = (uint8_t)enmType;
3446 pEntry->Label.uData = uData;
3447}
3448
3449
3450/**
3451 * Debug Info: Record info about a threaded call.
3452 */
3453static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3454{
3455 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3456 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3457 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3458 pEntry->ThreadedCall.uUnused = 0;
3459 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3460}
3461
3462
3463/**
3464 * Debug Info: Record info about a new guest instruction.
3465 */
3466static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3467{
3468 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3469 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3470 pEntry->GuestInstruction.uUnused = 0;
3471 pEntry->GuestInstruction.fExec = fExec;
3472}
3473
3474
3475/**
3476 * Debug Info: Record info about guest register shadowing.
3477 */
3478static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3479 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3480{
3481 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3482 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3483 pEntry->GuestRegShadowing.uUnused = 0;
3484 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3485 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3486 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3487}
3488
3489
3490# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3491/**
3492 * Debug Info: Record info about guest register shadowing.
3493 */
3494static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3495 uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
3496{
3497 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3498 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3499 pEntry->GuestSimdRegShadowing.uUnused = 0;
3500 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3501 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3502 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3503}
3504# endif
3505
3506
3507# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3508/**
3509 * Debug Info: Record info about delayed RIP updates.
3510 */
3511static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3512{
3513 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3514 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3515 pEntry->DelayedPcUpdate.offPc = offPc;
3516 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3517}
3518# endif
3519
3520#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3521
3522
3523/*********************************************************************************************************************************
3524* Register Allocator *
3525*********************************************************************************************************************************/
3526
3527/**
3528 * Register parameter indexes (indexed by argument number).
3529 */
3530DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3531{
3532 IEMNATIVE_CALL_ARG0_GREG,
3533 IEMNATIVE_CALL_ARG1_GREG,
3534 IEMNATIVE_CALL_ARG2_GREG,
3535 IEMNATIVE_CALL_ARG3_GREG,
3536#if defined(IEMNATIVE_CALL_ARG4_GREG)
3537 IEMNATIVE_CALL_ARG4_GREG,
3538# if defined(IEMNATIVE_CALL_ARG5_GREG)
3539 IEMNATIVE_CALL_ARG5_GREG,
3540# if defined(IEMNATIVE_CALL_ARG6_GREG)
3541 IEMNATIVE_CALL_ARG6_GREG,
3542# if defined(IEMNATIVE_CALL_ARG7_GREG)
3543 IEMNATIVE_CALL_ARG7_GREG,
3544# endif
3545# endif
3546# endif
3547#endif
3548};
3549
3550/**
3551 * Call register masks indexed by argument count.
3552 */
3553DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3554{
3555 0,
3556 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3557 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3559 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3560 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3561#if defined(IEMNATIVE_CALL_ARG4_GREG)
3562 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3564# if defined(IEMNATIVE_CALL_ARG5_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3567# if defined(IEMNATIVE_CALL_ARG6_GREG)
3568 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3569 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3570 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3571# if defined(IEMNATIVE_CALL_ARG7_GREG)
3572 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3573 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3574 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3575# endif
3576# endif
3577# endif
3578#endif
3579};
3580
3581#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3582/**
3583 * BP offset of the stack argument slots.
3584 *
3585 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3586 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3587 */
3588DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3589{
3590 IEMNATIVE_FP_OFF_STACK_ARG0,
3591# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3592 IEMNATIVE_FP_OFF_STACK_ARG1,
3593# endif
3594# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3595 IEMNATIVE_FP_OFF_STACK_ARG2,
3596# endif
3597# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3598 IEMNATIVE_FP_OFF_STACK_ARG3,
3599# endif
3600};
3601AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3602#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3603
3604/**
3605 * Info about shadowed guest register values.
3606 * @see IEMNATIVEGSTREG
3607 */
3608static struct
3609{
3610 /** Offset in VMCPU. */
3611 uint32_t off;
3612 /** The field size. */
3613 uint8_t cb;
3614 /** Name (for logging). */
3615 const char *pszName;
3616} const g_aGstShadowInfo[] =
3617{
3618#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3620 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3621 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3622 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3623 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3624 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3625 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3626 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3627 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3628 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3629 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3630 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3631 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3632 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3633 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3634 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3635 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3636 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3637 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3638 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3639 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3640 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3641 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3642 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3643 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3644 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3645 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3646 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3647 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3648 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3649 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3650 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3651 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3652 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3653 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3654 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3655 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3656 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3657 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3658 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3659 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3660 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3661 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3662 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3663 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3664 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3665 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3666 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3667#undef CPUMCTX_OFF_AND_SIZE
3668};
3669AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3670
3671
3672/** Host CPU general purpose register names. */
3673DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3674{
3675#ifdef RT_ARCH_AMD64
3676 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3677#elif RT_ARCH_ARM64
3678 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3679 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3680#else
3681# error "port me"
3682#endif
3683};
3684
3685
3686DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3687 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3688{
3689 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3690
3691 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3692 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3693 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3694 return (uint8_t)idxReg;
3695}
3696
3697
3698#if 0 /* unused */
3699/**
3700 * Tries to locate a suitable register in the given register mask.
3701 *
3702 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3703 * failed.
3704 *
3705 * @returns Host register number on success, returns UINT8_MAX on failure.
3706 */
3707static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3708{
3709 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3710 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3711 if (fRegs)
3712 {
3713 /** @todo pick better here: */
3714 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3715
3716 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3717 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3718 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3719 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3720
3721 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3722 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3723 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3724 return idxReg;
3725 }
3726 return UINT8_MAX;
3727}
3728#endif /* unused */
3729
3730
3731/**
3732 * Locate a register, possibly freeing one up.
3733 *
3734 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3735 * failed.
3736 *
3737 * @returns Host register number on success. Returns UINT8_MAX if no registers
3738 * found, the caller is supposed to deal with this and raise a
3739 * allocation type specific status code (if desired).
3740 *
3741 * @throws VBox status code if we're run into trouble spilling a variable of
3742 * recording debug info. Does NOT throw anything if we're out of
3743 * registers, though.
3744 */
3745static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3746 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3747{
3748 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3749 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3750 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3751
3752 /*
3753 * Try a freed register that's shadowing a guest register.
3754 */
3755 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3756 if (fRegs)
3757 {
3758 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3759
3760#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3761 /*
3762 * When we have livness information, we use it to kick out all shadowed
3763 * guest register that will not be needed any more in this TB. If we're
3764 * lucky, this may prevent us from ending up here again.
3765 *
3766 * Note! We must consider the previous entry here so we don't free
3767 * anything that the current threaded function requires (current
3768 * entry is produced by the next threaded function).
3769 */
3770 uint32_t const idxCurCall = pReNative->idxCurCall;
3771 if (idxCurCall > 0)
3772 {
3773 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3774
3775# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3776 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3777 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3778 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3779#else
3780 /* Construct a mask of the registers not in the read or write state.
3781 Note! We could skips writes, if they aren't from us, as this is just
3782 a hack to prevent trashing registers that have just been written
3783 or will be written when we retire the current instruction. */
3784 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3785 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3786 & IEMLIVENESSBIT_MASK;
3787#endif
3788 /* Merge EFLAGS. */
3789 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3790 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3791 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3792 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3793 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3794
3795 /* If it matches any shadowed registers. */
3796 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3797 {
3798 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3799 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3800 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3801
3802 /* See if we've got any unshadowed registers we can return now. */
3803 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3804 if (fUnshadowedRegs)
3805 {
3806 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3807 return (fPreferVolatile
3808 ? ASMBitFirstSetU32(fUnshadowedRegs)
3809 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3810 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3811 - 1;
3812 }
3813 }
3814 }
3815#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3816
3817 unsigned const idxReg = (fPreferVolatile
3818 ? ASMBitFirstSetU32(fRegs)
3819 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3820 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3821 - 1;
3822
3823 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3824 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3825 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3826 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3827
3828 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3829 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3830 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3831 return idxReg;
3832 }
3833
3834 /*
3835 * Try free up a variable that's in a register.
3836 *
3837 * We do two rounds here, first evacuating variables we don't need to be
3838 * saved on the stack, then in the second round move things to the stack.
3839 */
3840 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3841 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3842 {
3843 uint32_t fVars = pReNative->Core.bmVars;
3844 while (fVars)
3845 {
3846 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3847 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3848 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3849 && (RT_BIT_32(idxReg) & fRegMask)
3850 && ( iLoop == 0
3851 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3852 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3853 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3854 {
3855 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3856 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3857 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3858 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3859 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3860 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3861
3862 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3863 {
3864 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3865 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3866 }
3867
3868 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3870
3871 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3872 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3873 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3874 return idxReg;
3875 }
3876 fVars &= ~RT_BIT_32(idxVar);
3877 }
3878 }
3879
3880 return UINT8_MAX;
3881}
3882
3883
3884/**
3885 * Reassigns a variable to a different register specified by the caller.
3886 *
3887 * @returns The new code buffer position.
3888 * @param pReNative The native recompile state.
3889 * @param off The current code buffer position.
3890 * @param idxVar The variable index.
3891 * @param idxRegOld The old host register number.
3892 * @param idxRegNew The new host register number.
3893 * @param pszCaller The caller for logging.
3894 */
3895static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3896 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3897{
3898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3899 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3900 RT_NOREF(pszCaller);
3901
3902 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3903
3904 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3905 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3906 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3908
3909 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3910 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3911 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3912 if (fGstRegShadows)
3913 {
3914 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3915 | RT_BIT_32(idxRegNew);
3916 while (fGstRegShadows)
3917 {
3918 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3919 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3920
3921 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3922 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3923 }
3924 }
3925
3926 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3927 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3928 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3929 return off;
3930}
3931
3932
3933/**
3934 * Moves a variable to a different register or spills it onto the stack.
3935 *
3936 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3937 * kinds can easily be recreated if needed later.
3938 *
3939 * @returns The new code buffer position.
3940 * @param pReNative The native recompile state.
3941 * @param off The current code buffer position.
3942 * @param idxVar The variable index.
3943 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3944 * call-volatile registers.
3945 */
3946static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3947 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3948{
3949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3950 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3951 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3952 Assert(!pVar->fRegAcquired);
3953
3954 uint8_t const idxRegOld = pVar->idxReg;
3955 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3956 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3957 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3958 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3959 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3960 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3961 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3962 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3963
3964
3965 /** @todo Add statistics on this.*/
3966 /** @todo Implement basic variable liveness analysis (python) so variables
3967 * can be freed immediately once no longer used. This has the potential to
3968 * be trashing registers and stack for dead variables.
3969 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3970
3971 /*
3972 * First try move it to a different register, as that's cheaper.
3973 */
3974 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3975 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3976 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3977 if (fRegs)
3978 {
3979 /* Avoid using shadow registers, if possible. */
3980 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3981 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3982 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3983 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3984 }
3985
3986 /*
3987 * Otherwise we must spill the register onto the stack.
3988 */
3989 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3990 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3991 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3992 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3993
3994 pVar->idxReg = UINT8_MAX;
3995 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3996 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3997 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3998 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3999 return off;
4000}
4001
4002
4003/**
4004 * Allocates a temporary host general purpose register.
4005 *
4006 * This may emit code to save register content onto the stack in order to free
4007 * up a register.
4008 *
4009 * @returns The host register number; throws VBox status code on failure,
4010 * so no need to check the return value.
4011 * @param pReNative The native recompile state.
4012 * @param poff Pointer to the variable with the code buffer position.
4013 * This will be update if we need to move a variable from
4014 * register to stack in order to satisfy the request.
4015 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4016 * registers (@c true, default) or the other way around
4017 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4018 */
4019DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4020{
4021 /*
4022 * Try find a completely unused register, preferably a call-volatile one.
4023 */
4024 uint8_t idxReg;
4025 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4026 & ~pReNative->Core.bmHstRegsWithGstShadow
4027 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4028 if (fRegs)
4029 {
4030 if (fPreferVolatile)
4031 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4032 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4033 else
4034 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4035 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4036 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4037 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4038 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4039 }
4040 else
4041 {
4042 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4043 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4044 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4045 }
4046 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4047}
4048
4049
4050/**
4051 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4052 * registers.
4053 *
4054 * @returns The host register number; throws VBox status code on failure,
4055 * so no need to check the return value.
4056 * @param pReNative The native recompile state.
4057 * @param poff Pointer to the variable with the code buffer position.
4058 * This will be update if we need to move a variable from
4059 * register to stack in order to satisfy the request.
4060 * @param fRegMask Mask of acceptable registers.
4061 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4062 * registers (@c true, default) or the other way around
4063 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4064 */
4065DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4066 bool fPreferVolatile /*= true*/)
4067{
4068 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4069 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4070
4071 /*
4072 * Try find a completely unused register, preferably a call-volatile one.
4073 */
4074 uint8_t idxReg;
4075 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4076 & ~pReNative->Core.bmHstRegsWithGstShadow
4077 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4078 & fRegMask;
4079 if (fRegs)
4080 {
4081 if (fPreferVolatile)
4082 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4083 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4084 else
4085 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4086 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4087 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4088 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4089 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4090 }
4091 else
4092 {
4093 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4094 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4095 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4096 }
4097 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4098}
4099
4100
4101/**
4102 * Allocates a temporary register for loading an immediate value into.
4103 *
4104 * This will emit code to load the immediate, unless there happens to be an
4105 * unused register with the value already loaded.
4106 *
4107 * The caller will not modify the returned register, it must be considered
4108 * read-only. Free using iemNativeRegFreeTmpImm.
4109 *
4110 * @returns The host register number; throws VBox status code on failure, so no
4111 * need to check the return value.
4112 * @param pReNative The native recompile state.
4113 * @param poff Pointer to the variable with the code buffer position.
4114 * @param uImm The immediate value that the register must hold upon
4115 * return.
4116 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4117 * registers (@c true, default) or the other way around
4118 * (@c false).
4119 *
4120 * @note Reusing immediate values has not been implemented yet.
4121 */
4122DECL_HIDDEN_THROW(uint8_t)
4123iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4124{
4125 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4126 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4127 return idxReg;
4128}
4129
4130#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4131
4132# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4133/**
4134 * Helper for iemNativeLivenessGetStateByGstReg.
4135 *
4136 * @returns IEMLIVENESS_STATE_XXX
4137 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4138 * ORed together.
4139 */
4140DECL_FORCE_INLINE(uint32_t)
4141iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4142{
4143 /* INPUT trumps anything else. */
4144 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4145 return IEMLIVENESS_STATE_INPUT;
4146
4147 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4148 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4149 {
4150 /* If not all sub-fields are clobbered they must be considered INPUT. */
4151 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4152 return IEMLIVENESS_STATE_INPUT;
4153 return IEMLIVENESS_STATE_CLOBBERED;
4154 }
4155
4156 /* XCPT_OR_CALL trumps UNUSED. */
4157 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4158 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4159
4160 return IEMLIVENESS_STATE_UNUSED;
4161}
4162# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4163
4164
4165DECL_FORCE_INLINE(uint32_t)
4166iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4167{
4168# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4169 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4170 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4171# else
4172 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4173 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4174 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4175 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4176# endif
4177}
4178
4179
4180DECL_FORCE_INLINE(uint32_t)
4181iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4182{
4183 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4184 if (enmGstReg == kIemNativeGstReg_EFlags)
4185 {
4186 /* Merge the eflags states to one. */
4187# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4188 uRet = RT_BIT_32(uRet);
4189 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4190 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4191 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4192 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4193 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4194 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4195 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4196# else
4197 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4198 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4199 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4200 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4201 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4202 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4203 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4204# endif
4205 }
4206 return uRet;
4207}
4208
4209
4210# ifdef VBOX_STRICT
4211/** For assertions only, user checks that idxCurCall isn't zerow. */
4212DECL_FORCE_INLINE(uint32_t)
4213iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4214{
4215 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4216}
4217# endif /* VBOX_STRICT */
4218
4219#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4220
4221/**
4222 * Marks host register @a idxHstReg as containing a shadow copy of guest
4223 * register @a enmGstReg.
4224 *
4225 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4226 * host register before calling.
4227 */
4228DECL_FORCE_INLINE(void)
4229iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4230{
4231 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4232 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4233 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4234
4235 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4236 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4237 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4238 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4239#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4240 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4241 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4242#else
4243 RT_NOREF(off);
4244#endif
4245}
4246
4247
4248/**
4249 * Clear any guest register shadow claims from @a idxHstReg.
4250 *
4251 * The register does not need to be shadowing any guest registers.
4252 */
4253DECL_FORCE_INLINE(void)
4254iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4255{
4256 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4257 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4258 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4259 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4260 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4261
4262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4263 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4264 if (fGstRegs)
4265 {
4266 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4267 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4268 while (fGstRegs)
4269 {
4270 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4271 fGstRegs &= ~RT_BIT_64(iGstReg);
4272 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4273 }
4274 }
4275#else
4276 RT_NOREF(off);
4277#endif
4278
4279 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4282}
4283
4284
4285/**
4286 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4287 * and global overview flags.
4288 */
4289DECL_FORCE_INLINE(void)
4290iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4291{
4292 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4293 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4294 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4295 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4296 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4297 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4298 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4299
4300#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4301 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4302 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4303#else
4304 RT_NOREF(off);
4305#endif
4306
4307 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4308 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4309 if (!fGstRegShadowsNew)
4310 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4311 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4312}
4313
4314
4315#if 0 /* unused */
4316/**
4317 * Clear any guest register shadow claim for @a enmGstReg.
4318 */
4319DECL_FORCE_INLINE(void)
4320iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4321{
4322 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4323 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4324 {
4325 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4326 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4327 }
4328}
4329#endif
4330
4331
4332/**
4333 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4334 * as the new shadow of it.
4335 *
4336 * Unlike the other guest reg shadow helpers, this does the logging for you.
4337 * However, it is the liveness state is not asserted here, the caller must do
4338 * that.
4339 */
4340DECL_FORCE_INLINE(void)
4341iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4342 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4343{
4344 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4345 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4346 {
4347 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4348 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4349 if (idxHstRegOld == idxHstRegNew)
4350 return;
4351 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4352 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4353 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4354 }
4355 else
4356 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4357 g_aGstShadowInfo[enmGstReg].pszName));
4358 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4359}
4360
4361
4362/**
4363 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4364 * to @a idxRegTo.
4365 */
4366DECL_FORCE_INLINE(void)
4367iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4368 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4369{
4370 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4371 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4372 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4373 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4374 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4375 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4376 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4377 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4378 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4379
4380 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4381 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4382 if (!fGstRegShadowsFrom)
4383 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4384 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4385 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4386 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4387#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4388 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4389 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4390#else
4391 RT_NOREF(off);
4392#endif
4393}
4394
4395
4396/**
4397 * Allocates a temporary host general purpose register for keeping a guest
4398 * register value.
4399 *
4400 * Since we may already have a register holding the guest register value,
4401 * code will be emitted to do the loading if that's not the case. Code may also
4402 * be emitted if we have to free up a register to satify the request.
4403 *
4404 * @returns The host register number; throws VBox status code on failure, so no
4405 * need to check the return value.
4406 * @param pReNative The native recompile state.
4407 * @param poff Pointer to the variable with the code buffer
4408 * position. This will be update if we need to move a
4409 * variable from register to stack in order to satisfy
4410 * the request.
4411 * @param enmGstReg The guest register that will is to be updated.
4412 * @param enmIntendedUse How the caller will be using the host register.
4413 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4414 * register is okay (default). The ASSUMPTION here is
4415 * that the caller has already flushed all volatile
4416 * registers, so this is only applied if we allocate a
4417 * new register.
4418 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4419 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4420 */
4421DECL_HIDDEN_THROW(uint8_t)
4422iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4423 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4424 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4425{
4426 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4428 AssertMsg( fSkipLivenessAssert
4429 || pReNative->idxCurCall == 0
4430 || enmGstReg == kIemNativeGstReg_Pc
4431 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4432 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4433 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4434 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4435 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4436 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4437#endif
4438 RT_NOREF(fSkipLivenessAssert);
4439#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4440 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4441#endif
4442 uint32_t const fRegMask = !fNoVolatileRegs
4443 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4444 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4445
4446 /*
4447 * First check if the guest register value is already in a host register.
4448 */
4449 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4450 {
4451 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4452 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4453 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4454 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4455
4456 /* It's not supposed to be allocated... */
4457 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4458 {
4459 /*
4460 * If the register will trash the guest shadow copy, try find a
4461 * completely unused register we can use instead. If that fails,
4462 * we need to disassociate the host reg from the guest reg.
4463 */
4464 /** @todo would be nice to know if preserving the register is in any way helpful. */
4465 /* If the purpose is calculations, try duplicate the register value as
4466 we'll be clobbering the shadow. */
4467 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4468 && ( ~pReNative->Core.bmHstRegs
4469 & ~pReNative->Core.bmHstRegsWithGstShadow
4470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4471 {
4472 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4473
4474 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4475
4476 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4477 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4478 g_apszIemNativeHstRegNames[idxRegNew]));
4479 idxReg = idxRegNew;
4480 }
4481 /* If the current register matches the restrictions, go ahead and allocate
4482 it for the caller. */
4483 else if (fRegMask & RT_BIT_32(idxReg))
4484 {
4485 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4486 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4487 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4488 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4489 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4490 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4491 else
4492 {
4493 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4494 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4495 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4496 }
4497 }
4498 /* Otherwise, allocate a register that satisfies the caller and transfer
4499 the shadowing if compatible with the intended use. (This basically
4500 means the call wants a non-volatile register (RSP push/pop scenario).) */
4501 else
4502 {
4503 Assert(fNoVolatileRegs);
4504 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4505 !fNoVolatileRegs
4506 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4507 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4508 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4509 {
4510 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4511 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4512 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4513 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4514 }
4515 else
4516 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4517 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4518 g_apszIemNativeHstRegNames[idxRegNew]));
4519 idxReg = idxRegNew;
4520 }
4521 }
4522 else
4523 {
4524 /*
4525 * Oops. Shadowed guest register already allocated!
4526 *
4527 * Allocate a new register, copy the value and, if updating, the
4528 * guest shadow copy assignment to the new register.
4529 */
4530 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4531 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4532 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4533 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4534
4535 /** @todo share register for readonly access. */
4536 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4537 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4538
4539 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4540 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4541
4542 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4543 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4544 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4545 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4546 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4547 else
4548 {
4549 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4550 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4551 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4552 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4553 }
4554 idxReg = idxRegNew;
4555 }
4556 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4557
4558#ifdef VBOX_STRICT
4559 /* Strict builds: Check that the value is correct. */
4560 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4561#endif
4562
4563 return idxReg;
4564 }
4565
4566 /*
4567 * Allocate a new register, load it with the guest value and designate it as a copy of the
4568 */
4569 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4570
4571 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4572 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4573
4574 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4575 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4576 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4577 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4578
4579 return idxRegNew;
4580}
4581
4582
4583/**
4584 * Allocates a temporary host general purpose register that already holds the
4585 * given guest register value.
4586 *
4587 * The use case for this function is places where the shadowing state cannot be
4588 * modified due to branching and such. This will fail if the we don't have a
4589 * current shadow copy handy or if it's incompatible. The only code that will
4590 * be emitted here is value checking code in strict builds.
4591 *
4592 * The intended use can only be readonly!
4593 *
4594 * @returns The host register number, UINT8_MAX if not present.
4595 * @param pReNative The native recompile state.
4596 * @param poff Pointer to the instruction buffer offset.
4597 * Will be updated in strict builds if a register is
4598 * found.
4599 * @param enmGstReg The guest register that will is to be updated.
4600 * @note In strict builds, this may throw instruction buffer growth failures.
4601 * Non-strict builds will not throw anything.
4602 * @sa iemNativeRegAllocTmpForGuestReg
4603 */
4604DECL_HIDDEN_THROW(uint8_t)
4605iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4606{
4607 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4609 AssertMsg( pReNative->idxCurCall == 0
4610 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4611 || enmGstReg == kIemNativeGstReg_Pc,
4612 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4613#endif
4614
4615 /*
4616 * First check if the guest register value is already in a host register.
4617 */
4618 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4619 {
4620 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4621 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4622 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4623 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4624
4625 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4626 {
4627 /*
4628 * We only do readonly use here, so easy compared to the other
4629 * variant of this code.
4630 */
4631 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4632 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4633 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4634 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4635 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4636
4637#ifdef VBOX_STRICT
4638 /* Strict builds: Check that the value is correct. */
4639 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4640#else
4641 RT_NOREF(poff);
4642#endif
4643 return idxReg;
4644 }
4645 }
4646
4647 return UINT8_MAX;
4648}
4649
4650
4651/**
4652 * Allocates argument registers for a function call.
4653 *
4654 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4655 * need to check the return value.
4656 * @param pReNative The native recompile state.
4657 * @param off The current code buffer offset.
4658 * @param cArgs The number of arguments the function call takes.
4659 */
4660DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4661{
4662 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4663 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4664 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4665 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4666
4667 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4668 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4669 else if (cArgs == 0)
4670 return true;
4671
4672 /*
4673 * Do we get luck and all register are free and not shadowing anything?
4674 */
4675 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4676 for (uint32_t i = 0; i < cArgs; i++)
4677 {
4678 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4679 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4680 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4681 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4682 }
4683 /*
4684 * Okay, not lucky so we have to free up the registers.
4685 */
4686 else
4687 for (uint32_t i = 0; i < cArgs; i++)
4688 {
4689 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4690 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4691 {
4692 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4693 {
4694 case kIemNativeWhat_Var:
4695 {
4696 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4698 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4699 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4700 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4701
4702 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4703 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4704 else
4705 {
4706 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4707 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4708 }
4709 break;
4710 }
4711
4712 case kIemNativeWhat_Tmp:
4713 case kIemNativeWhat_Arg:
4714 case kIemNativeWhat_rc:
4715 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4716 default:
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4718 }
4719
4720 }
4721 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4722 {
4723 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4724 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4725 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4726 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4727 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4728 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4729 }
4730 else
4731 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4732 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4733 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4734 }
4735 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4736 return true;
4737}
4738
4739
4740DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4741
4742
4743#if 0
4744/**
4745 * Frees a register assignment of any type.
4746 *
4747 * @param pReNative The native recompile state.
4748 * @param idxHstReg The register to free.
4749 *
4750 * @note Does not update variables.
4751 */
4752DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4753{
4754 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4755 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4756 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4757 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4758 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4759 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4760 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4761 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4762 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4763 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4764 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4765 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4766 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4767 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4768
4769 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4770 /* no flushing, right:
4771 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4772 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4773 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4774 */
4775}
4776#endif
4777
4778
4779/**
4780 * Frees a temporary register.
4781 *
4782 * Any shadow copies of guest registers assigned to the host register will not
4783 * be flushed by this operation.
4784 */
4785DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4786{
4787 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4788 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4789 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4790 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4791 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4792}
4793
4794
4795/**
4796 * Frees a temporary immediate register.
4797 *
4798 * It is assumed that the call has not modified the register, so it still hold
4799 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4800 */
4801DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4802{
4803 iemNativeRegFreeTmp(pReNative, idxHstReg);
4804}
4805
4806
4807/**
4808 * Frees a register assigned to a variable.
4809 *
4810 * The register will be disassociated from the variable.
4811 */
4812DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4813{
4814 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4815 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4816 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4818 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4819
4820 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4821 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4822 if (!fFlushShadows)
4823 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4824 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4825 else
4826 {
4827 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4828 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4829 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4830 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4831 uint64_t fGstRegShadows = fGstRegShadowsOld;
4832 while (fGstRegShadows)
4833 {
4834 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4835 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4836
4837 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4838 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4839 }
4840 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4841 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4842 }
4843}
4844
4845
4846/**
4847 * Called right before emitting a call instruction to move anything important
4848 * out of call-volatile registers, free and flush the call-volatile registers,
4849 * optionally freeing argument variables.
4850 *
4851 * @returns New code buffer offset, UINT32_MAX on failure.
4852 * @param pReNative The native recompile state.
4853 * @param off The code buffer offset.
4854 * @param cArgs The number of arguments the function call takes.
4855 * It is presumed that the host register part of these have
4856 * been allocated as such already and won't need moving,
4857 * just freeing.
4858 * @param fKeepVars Mask of variables that should keep their register
4859 * assignments. Caller must take care to handle these.
4860 */
4861DECL_HIDDEN_THROW(uint32_t)
4862iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4863{
4864 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4865
4866 /* fKeepVars will reduce this mask. */
4867 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4868
4869 /*
4870 * Move anything important out of volatile registers.
4871 */
4872 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4873 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4874 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4875#ifdef IEMNATIVE_REG_FIXED_TMP0
4876 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4877#endif
4878#ifdef IEMNATIVE_REG_FIXED_TMP1
4879 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4880#endif
4881#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4882 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4883#endif
4884 & ~g_afIemNativeCallRegs[cArgs];
4885
4886 fRegsToMove &= pReNative->Core.bmHstRegs;
4887 if (!fRegsToMove)
4888 { /* likely */ }
4889 else
4890 {
4891 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4892 while (fRegsToMove != 0)
4893 {
4894 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4895 fRegsToMove &= ~RT_BIT_32(idxReg);
4896
4897 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4898 {
4899 case kIemNativeWhat_Var:
4900 {
4901 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4903 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4904 Assert(pVar->idxReg == idxReg);
4905 if (!(RT_BIT_32(idxVar) & fKeepVars))
4906 {
4907 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4908 idxVar, pVar->enmKind, pVar->idxReg));
4909 if (pVar->enmKind != kIemNativeVarKind_Stack)
4910 pVar->idxReg = UINT8_MAX;
4911 else
4912 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4913 }
4914 else
4915 fRegsToFree &= ~RT_BIT_32(idxReg);
4916 continue;
4917 }
4918
4919 case kIemNativeWhat_Arg:
4920 AssertMsgFailed(("What?!?: %u\n", idxReg));
4921 continue;
4922
4923 case kIemNativeWhat_rc:
4924 case kIemNativeWhat_Tmp:
4925 AssertMsgFailed(("Missing free: %u\n", idxReg));
4926 continue;
4927
4928 case kIemNativeWhat_FixedTmp:
4929 case kIemNativeWhat_pVCpuFixed:
4930 case kIemNativeWhat_pCtxFixed:
4931 case kIemNativeWhat_PcShadow:
4932 case kIemNativeWhat_FixedReserved:
4933 case kIemNativeWhat_Invalid:
4934 case kIemNativeWhat_End:
4935 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4936 }
4937 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4938 }
4939 }
4940
4941 /*
4942 * Do the actual freeing.
4943 */
4944 if (pReNative->Core.bmHstRegs & fRegsToFree)
4945 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4946 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4947 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4948
4949 /* If there are guest register shadows in any call-volatile register, we
4950 have to clear the corrsponding guest register masks for each register. */
4951 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4952 if (fHstRegsWithGstShadow)
4953 {
4954 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4955 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4956 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4957 do
4958 {
4959 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4960 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4961
4962 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4963 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4964 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4965 } while (fHstRegsWithGstShadow != 0);
4966 }
4967
4968 return off;
4969}
4970
4971
4972/**
4973 * Flushes a set of guest register shadow copies.
4974 *
4975 * This is usually done after calling a threaded function or a C-implementation
4976 * of an instruction.
4977 *
4978 * @param pReNative The native recompile state.
4979 * @param fGstRegs Set of guest registers to flush.
4980 */
4981DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4982{
4983 /*
4984 * Reduce the mask by what's currently shadowed
4985 */
4986 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4987 fGstRegs &= bmGstRegShadowsOld;
4988 if (fGstRegs)
4989 {
4990 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4991 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4992 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4993 if (bmGstRegShadowsNew)
4994 {
4995 /*
4996 * Partial.
4997 */
4998 do
4999 {
5000 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5001 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5002 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5003 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5004 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5005
5006 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5007 fGstRegs &= ~fInThisHstReg;
5008 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5010 if (!fGstRegShadowsNew)
5011 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5012 } while (fGstRegs != 0);
5013 }
5014 else
5015 {
5016 /*
5017 * Clear all.
5018 */
5019 do
5020 {
5021 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5022 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5023 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5024 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5025 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5026
5027 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5028 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5029 } while (fGstRegs != 0);
5030 pReNative->Core.bmHstRegsWithGstShadow = 0;
5031 }
5032 }
5033}
5034
5035
5036/**
5037 * Flushes guest register shadow copies held by a set of host registers.
5038 *
5039 * This is used with the TLB lookup code for ensuring that we don't carry on
5040 * with any guest shadows in volatile registers, as these will get corrupted by
5041 * a TLB miss.
5042 *
5043 * @param pReNative The native recompile state.
5044 * @param fHstRegs Set of host registers to flush guest shadows for.
5045 */
5046DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5047{
5048 /*
5049 * Reduce the mask by what's currently shadowed.
5050 */
5051 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5052 fHstRegs &= bmHstRegsWithGstShadowOld;
5053 if (fHstRegs)
5054 {
5055 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5056 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5057 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5058 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5059 if (bmHstRegsWithGstShadowNew)
5060 {
5061 /*
5062 * Partial (likely).
5063 */
5064 uint64_t fGstShadows = 0;
5065 do
5066 {
5067 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5068 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5069 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5070 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5071
5072 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5073 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5074 fHstRegs &= ~RT_BIT_32(idxHstReg);
5075 } while (fHstRegs != 0);
5076 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5077 }
5078 else
5079 {
5080 /*
5081 * Clear all.
5082 */
5083 do
5084 {
5085 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5086 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5087 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5088 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5089
5090 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5091 fHstRegs &= ~RT_BIT_32(idxHstReg);
5092 } while (fHstRegs != 0);
5093 pReNative->Core.bmGstRegShadows = 0;
5094 }
5095 }
5096}
5097
5098
5099/**
5100 * Restores guest shadow copies in volatile registers.
5101 *
5102 * This is used after calling a helper function (think TLB miss) to restore the
5103 * register state of volatile registers.
5104 *
5105 * @param pReNative The native recompile state.
5106 * @param off The code buffer offset.
5107 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5108 * be active (allocated) w/o asserting. Hack.
5109 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5110 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5111 */
5112DECL_HIDDEN_THROW(uint32_t)
5113iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5114{
5115 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5116 if (fHstRegs)
5117 {
5118 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5119 do
5120 {
5121 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5122
5123 /* It's not fatal if a register is active holding a variable that
5124 shadowing a guest register, ASSUMING all pending guest register
5125 writes were flushed prior to the helper call. However, we'll be
5126 emitting duplicate restores, so it wasts code space. */
5127 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5128 RT_NOREF(fHstRegsActiveShadows);
5129
5130 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5131 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5132 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5133 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5134
5135 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5136 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5137
5138 fHstRegs &= ~RT_BIT_32(idxHstReg);
5139 } while (fHstRegs != 0);
5140 }
5141 return off;
5142}
5143
5144
5145#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5146# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5147static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5148{
5149 /* Compare the shadow with the context value, they should match. */
5150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5151 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5152 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5153 return off;
5154}
5155# endif
5156
5157/**
5158 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5159 */
5160static uint32_t
5161iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5162{
5163 if (pReNative->Core.offPc)
5164 {
5165# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5166 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5167 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5168# endif
5169
5170# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5171 /* Allocate a temporary PC register. */
5172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5173
5174 /* Perform the addition and store the result. */
5175 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5176 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5177
5178 /* Free but don't flush the PC register. */
5179 iemNativeRegFreeTmp(pReNative, idxPcReg);
5180# else
5181 /* Compare the shadow with the context value, they should match. */
5182 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5183 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5184# endif
5185
5186 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5187 pReNative->Core.offPc = 0;
5188 pReNative->Core.cInstrPcUpdateSkipped = 0;
5189 }
5190# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5191 else
5192 {
5193 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5194 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5195 }
5196# endif
5197
5198 return off;
5199}
5200#endif
5201
5202
5203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5204/*********************************************************************************************************************************
5205* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5206*********************************************************************************************************************************/
5207
5208/**
5209 * Info about shadowed guest SIMD register values.
5210 * @see IEMNATIVEGSTSIMDREG
5211 */
5212static struct
5213{
5214 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5215 uint32_t offXmm;
5216 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5217 uint32_t offYmm;
5218 /** Name (for logging). */
5219 const char *pszName;
5220} const g_aGstSimdShadowInfo[] =
5221{
5222#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5223 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5224 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5225 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5226 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5227 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5228 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5229 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5230 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5231 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5232 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5233 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5234 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5235 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5236 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5237 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5238 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5239 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5240#undef CPUMCTX_OFF_AND_SIZE
5241};
5242AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5243
5244
5245#ifdef LOG_ENABLED
5246/** Host CPU SIMD register names. */
5247DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5248{
5249#ifdef RT_ARCH_AMD64
5250 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5251#elif RT_ARCH_ARM64
5252 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5253 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5254#else
5255# error "port me"
5256#endif
5257};
5258#endif
5259
5260
5261DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
5262 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
5263{
5264 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5265
5266 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = enmWhat;
5267 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5268 RT_NOREF(idxVar);
5269 return idxSimdReg;
5270}
5271
5272
5273/**
5274 * Frees a temporary SIMD register.
5275 *
5276 * Any shadow copies of guest registers assigned to the host register will not
5277 * be flushed by this operation.
5278 */
5279DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5280{
5281 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5282 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5283 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5284 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5285 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5286}
5287
5288
5289/**
5290 * Locate a register, possibly freeing one up.
5291 *
5292 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5293 * failed.
5294 *
5295 * @returns Host register number on success. Returns UINT8_MAX if no registers
5296 * found, the caller is supposed to deal with this and raise a
5297 * allocation type specific status code (if desired).
5298 *
5299 * @throws VBox status code if we're run into trouble spilling a variable of
5300 * recording debug info. Does NOT throw anything if we're out of
5301 * registers, though.
5302 */
5303static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5304 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5305{
5306 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5307 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5308 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5309
5310 AssertFailed();
5311
5312 /*
5313 * Try a freed register that's shadowing a guest register.
5314 */
5315 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5316 if (fRegs)
5317 {
5318 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5319
5320#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5321 /*
5322 * When we have livness information, we use it to kick out all shadowed
5323 * guest register that will not be needed any more in this TB. If we're
5324 * lucky, this may prevent us from ending up here again.
5325 *
5326 * Note! We must consider the previous entry here so we don't free
5327 * anything that the current threaded function requires (current
5328 * entry is produced by the next threaded function).
5329 */
5330 uint32_t const idxCurCall = pReNative->idxCurCall;
5331 if (idxCurCall > 0)
5332 {
5333 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5334
5335# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5336 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5337 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5338 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5339#else
5340 /* Construct a mask of the registers not in the read or write state.
5341 Note! We could skips writes, if they aren't from us, as this is just
5342 a hack to prevent trashing registers that have just been written
5343 or will be written when we retire the current instruction. */
5344 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5345 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5346 & IEMLIVENESSBIT_MASK;
5347#endif
5348 /* Merge EFLAGS. */
5349 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
5350 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
5351 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
5352 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
5353 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
5354
5355 /* If it matches any shadowed registers. */
5356 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5357 {
5358 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5359 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5360 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5361
5362 /* See if we've got any unshadowed registers we can return now. */
5363 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5364 if (fUnshadowedRegs)
5365 {
5366 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5367 return (fPreferVolatile
5368 ? ASMBitFirstSetU32(fUnshadowedRegs)
5369 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5370 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5371 - 1;
5372 }
5373 }
5374 }
5375#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5376
5377 unsigned const idxReg = (fPreferVolatile
5378 ? ASMBitFirstSetU32(fRegs)
5379 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5380 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5381 - 1;
5382
5383 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5384 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5385 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5386 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5387 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5388
5389 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5390 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5391 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5392 return idxReg;
5393 }
5394
5395 /*
5396 * Try free up a variable that's in a register.
5397 *
5398 * We do two rounds here, first evacuating variables we don't need to be
5399 * saved on the stack, then in the second round move things to the stack.
5400 */
5401 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5402 AssertReleaseFailed(); /** @todo */
5403#if 0
5404 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5405 {
5406 uint32_t fVars = pReNative->Core.bmSimdVars;
5407 while (fVars)
5408 {
5409 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5410 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5411 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5412 && (RT_BIT_32(idxReg) & fRegMask)
5413 && ( iLoop == 0
5414 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5415 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5416 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5417 {
5418 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5419 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5420 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5421 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5422 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5423 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5424
5425 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5426 {
5427 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5428 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5429 }
5430
5431 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5432 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5433
5434 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5435 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5436 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5437 return idxReg;
5438 }
5439 fVars &= ~RT_BIT_32(idxVar);
5440 }
5441 }
5442#else
5443 RT_NOREF(poff);
5444#endif
5445
5446 return UINT8_MAX;
5447}
5448
5449
5450/**
5451 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
5452 * SIMD register @a enmGstSimdReg.
5453 *
5454 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
5455 * host register before calling.
5456 */
5457DECL_FORCE_INLINE(void)
5458iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5459{
5460 Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
5461 Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5462 Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
5463
5464 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxHstSimdReg;
5465 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5466 pReNative->Core.bmGstSimdRegShadows |= RT_BIT_64(enmGstSimdReg);
5467 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxHstSimdReg);
5468#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5469 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5470 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
5471#else
5472 RT_NOREF(off);
5473#endif
5474}
5475
5476
5477/**
5478 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
5479 * to @a idxSimdRegTo.
5480 */
5481DECL_FORCE_INLINE(void)
5482iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
5483 IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5484{
5485 Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5486 Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
5487 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
5488 == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
5489 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5490 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
5491 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
5492 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
5493 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
5494 Assert( pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
5495 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
5496
5497
5498 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
5499 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows = fGstRegShadowsFrom;
5500 if (!fGstRegShadowsFrom)
5501 {
5502 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdRegFrom);
5503 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5504 }
5505 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxSimdRegTo);
5506 pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5507 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxSimdRegTo;
5508#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5509 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5510 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
5511#else
5512 RT_NOREF(off);
5513#endif
5514}
5515
5516
5517/**
5518 * Clear any guest register shadow claims from @a idxHstSimdReg.
5519 *
5520 * The register does not need to be shadowing any guest registers.
5521 */
5522DECL_FORCE_INLINE(void)
5523iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
5524{
5525 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5526 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
5527 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5528 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
5529 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5530 Assert( !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
5531 && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
5532
5533#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5534 uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5535 if (fGstRegs)
5536 {
5537 Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
5538 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5539 while (fGstRegs)
5540 {
5541 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5542 fGstRegs &= ~RT_BIT_64(iGstReg);
5543 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
5544 }
5545 }
5546#else
5547 RT_NOREF(off);
5548#endif
5549
5550 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstSimdReg);
5551 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5552 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5553 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5554}
5555
5556
5557/**
5558 * Flushes a set of guest register shadow copies.
5559 *
5560 * This is usually done after calling a threaded function or a C-implementation
5561 * of an instruction.
5562 *
5563 * @param pReNative The native recompile state.
5564 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5565 */
5566DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5567{
5568 /*
5569 * Reduce the mask by what's currently shadowed
5570 */
5571 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5572 fGstSimdRegs &= bmGstSimdRegShadows;
5573 if (fGstSimdRegs)
5574 {
5575 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5576 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5577 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5578 if (bmGstSimdRegShadowsNew)
5579 {
5580 /*
5581 * Partial.
5582 */
5583 do
5584 {
5585 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5586 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5587 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5588 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5589 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5590 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5591
5592 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5593 fGstSimdRegs &= ~fInThisHstReg;
5594 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5595 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5596 if (!fGstRegShadowsNew)
5597 {
5598 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5599 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5600 }
5601 } while (fGstSimdRegs != 0);
5602 }
5603 else
5604 {
5605 /*
5606 * Clear all.
5607 */
5608 do
5609 {
5610 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5611 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5612 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5613 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5614 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5615 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5616
5617 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5618 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5619 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5620 } while (fGstSimdRegs != 0);
5621 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5622 }
5623 }
5624}
5625
5626
5627/**
5628 * Allocates a temporary host SIMD register.
5629 *
5630 * This may emit code to save register content onto the stack in order to free
5631 * up a register.
5632 *
5633 * @returns The host register number; throws VBox status code on failure,
5634 * so no need to check the return value.
5635 * @param pReNative The native recompile state.
5636 * @param poff Pointer to the variable with the code buffer position.
5637 * This will be update if we need to move a variable from
5638 * register to stack in order to satisfy the request.
5639 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5640 * registers (@c true, default) or the other way around
5641 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5642 */
5643DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5644{
5645 /*
5646 * Try find a completely unused register, preferably a call-volatile one.
5647 */
5648 uint8_t idxSimdReg;
5649 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5650 & ~pReNative->Core.bmHstRegsWithGstShadow
5651 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5652 if (fRegs)
5653 {
5654 if (fPreferVolatile)
5655 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5656 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5657 else
5658 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5659 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5660 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5661 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5662 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5663 }
5664 else
5665 {
5666 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5667 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5668 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5669 }
5670
5671 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5672 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5673}
5674
5675
5676/**
5677 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5678 * registers.
5679 *
5680 * @returns The host register number; throws VBox status code on failure,
5681 * so no need to check the return value.
5682 * @param pReNative The native recompile state.
5683 * @param poff Pointer to the variable with the code buffer position.
5684 * This will be update if we need to move a variable from
5685 * register to stack in order to satisfy the request.
5686 * @param fRegMask Mask of acceptable registers.
5687 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5688 * registers (@c true, default) or the other way around
5689 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5690 */
5691DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5692 bool fPreferVolatile /*= true*/)
5693{
5694 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5695 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5696
5697 /*
5698 * Try find a completely unused register, preferably a call-volatile one.
5699 */
5700 uint8_t idxSimdReg;
5701 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5702 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5703 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5704 & fRegMask;
5705 if (fRegs)
5706 {
5707 if (fPreferVolatile)
5708 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5709 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5710 else
5711 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5712 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5713 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5714 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5715 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5716 }
5717 else
5718 {
5719 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5720 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5721 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5722 }
5723
5724 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5725 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5726}
5727
5728
5729static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5730 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5731{
5732 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5733 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == enmLoadSzDst
5734 || pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5735 {
5736# ifdef RT_ARCH_ARM64
5737 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5738 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5739# endif
5740
5741 switch (enmLoadSzDst)
5742 {
5743 case kIemNativeGstSimdRegLdStSz_256:
5744 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5745 case kIemNativeGstSimdRegLdStSz_Low128:
5746 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5747 case kIemNativeGstSimdRegLdStSz_High128:
5748 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5749 default:
5750 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5751 }
5752
5753 pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst;
5754 return off;
5755 }
5756 else
5757 {
5758 /* Complicated stuff where the source is currently missing something, later. */
5759 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5760 }
5761
5762 return off;
5763}
5764
5765
5766/**
5767 * Allocates a temporary host SIMD register for keeping a guest
5768 * SIMD register value.
5769 *
5770 * Since we may already have a register holding the guest register value,
5771 * code will be emitted to do the loading if that's not the case. Code may also
5772 * be emitted if we have to free up a register to satify the request.
5773 *
5774 * @returns The host register number; throws VBox status code on failure, so no
5775 * need to check the return value.
5776 * @param pReNative The native recompile state.
5777 * @param poff Pointer to the variable with the code buffer
5778 * position. This will be update if we need to move a
5779 * variable from register to stack in order to satisfy
5780 * the request.
5781 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5782 * @param enmIntendedUse How the caller will be using the host register.
5783 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5784 * register is okay (default). The ASSUMPTION here is
5785 * that the caller has already flushed all volatile
5786 * registers, so this is only applied if we allocate a
5787 * new register.
5788 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5789 */
5790DECL_HIDDEN_THROW(uint8_t)
5791iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5792 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5793 bool fNoVolatileRegs /*= false*/)
5794{
5795 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5796#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5797 AssertMsg( pReNative->idxCurCall == 0
5798 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5799 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5800 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5801 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5802 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5803 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5804#endif
5805#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5806 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5807#endif
5808 uint32_t const fRegMask = !fNoVolatileRegs
5809 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5810 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5811
5812 /*
5813 * First check if the guest register value is already in a host register.
5814 */
5815 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5816 {
5817 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5818 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5819 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5820 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5821
5822 /* It's not supposed to be allocated... */
5823 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5824 {
5825 /*
5826 * If the register will trash the guest shadow copy, try find a
5827 * completely unused register we can use instead. If that fails,
5828 * we need to disassociate the host reg from the guest reg.
5829 */
5830 /** @todo would be nice to know if preserving the register is in any way helpful. */
5831 /* If the purpose is calculations, try duplicate the register value as
5832 we'll be clobbering the shadow. */
5833 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5834 && ( ~pReNative->Core.bmHstSimdRegs
5835 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5836 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5837 {
5838 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5839
5840 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5841
5842 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5843 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5844 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5845 idxSimdReg = idxRegNew;
5846 }
5847 /* If the current register matches the restrictions, go ahead and allocate
5848 it for the caller. */
5849 else if (fRegMask & RT_BIT_32(idxSimdReg))
5850 {
5851 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5852 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5853 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5854 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5855 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5856 else
5857 {
5858 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5859 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5860 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5861 }
5862 }
5863 /* Otherwise, allocate a register that satisfies the caller and transfer
5864 the shadowing if compatible with the intended use. (This basically
5865 means the call wants a non-volatile register (RSP push/pop scenario).) */
5866 else
5867 {
5868 Assert(fNoVolatileRegs);
5869 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5870 !fNoVolatileRegs
5871 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5872 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5873 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5874 {
5875 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5876 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5877 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5878 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5879 }
5880 else
5881 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5882 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5883 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5884 idxSimdReg = idxRegNew;
5885 }
5886 }
5887 else
5888 {
5889 /*
5890 * Oops. Shadowed guest register already allocated!
5891 *
5892 * Allocate a new register, copy the value and, if updating, the
5893 * guest shadow copy assignment to the new register.
5894 */
5895 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5896 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5897 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5898 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5899
5900 /** @todo share register for readonly access. */
5901 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5902 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5903
5904 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5905 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5906 else
5907 {
5908 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5909 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5910 }
5911
5912 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5913 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5914 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5915 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5916 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5917 else
5918 {
5919 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5920 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5921 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5922 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5923 }
5924 idxSimdReg = idxRegNew;
5925 }
5926 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5927
5928#ifdef VBOX_STRICT
5929 /* Strict builds: Check that the value is correct. */
5930 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5931#endif
5932
5933 return idxSimdReg;
5934 }
5935
5936 /*
5937 * Allocate a new register, load it with the guest value and designate it as a copy of the
5938 */
5939 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5940
5941 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5942 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5943 else
5944 {
5945 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5946 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5947 }
5948
5949 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5950 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5951
5952 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5953 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5954
5955 return idxRegNew;
5956}
5957
5958
5959/**
5960 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5961 *
5962 * @returns New code bufferoffset.
5963 * @param pReNative The native recompile state.
5964 * @param off Current code buffer position.
5965 * @param idxGstSimdReg The guest SIMD register to flush.
5966 */
5967static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGstSimdReg)
5968{
5969 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5970
5971 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5972 g_aGstSimdShadowInfo[idxGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5973 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg),
5974 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg)));
5975
5976#ifdef RT_ARCH_AMD64
5977# error "Port me"
5978#elif defined(RT_ARCH_ARM64)
5979 /* ASSUMING there are two consecutive host registers to store the potential 256-bit guest register. */
5980 Assert(!(idxHstSimdReg & 0x1));
5981 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
5982 {
5983 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5984 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5985 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
5986 }
5987
5988 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))
5989 {
5990 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5991 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5992 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg + 1, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
5993 }
5994#endif
5995
5996 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, idxGstSimdReg);
5997 return off;
5998}
5999
6000#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6001
6002
6003
6004/*********************************************************************************************************************************
6005* Code emitters for flushing pending guest register writes and sanity checks *
6006*********************************************************************************************************************************/
6007
6008/**
6009 * Flushes delayed write of a specific guest register.
6010 *
6011 * This must be called prior to calling CImpl functions and any helpers that use
6012 * the guest state (like raising exceptions) and such.
6013 *
6014 * This optimization has not yet been implemented. The first target would be
6015 * RIP updates, since these are the most common ones.
6016 */
6017DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6018 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
6019{
6020#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6021 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
6022#endif
6023
6024#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6025 if ( enmClass == kIemNativeGstRegRef_XReg
6026 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
6027 {
6028 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxReg);
6029 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
6030 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
6031
6032 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6033 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
6034 }
6035#endif
6036 RT_NOREF(pReNative, enmClass, idxReg);
6037 return off;
6038}
6039
6040
6041/**
6042 * Flushes any delayed guest register writes.
6043 *
6044 * This must be called prior to calling CImpl functions and any helpers that use
6045 * the guest state (like raising exceptions) and such.
6046 *
6047 * This optimization has not yet been implemented. The first target would be
6048 * RIP updates, since these are the most common ones.
6049 */
6050DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
6051 bool fFlushShadows /*= true*/)
6052{
6053#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6054 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6055 off = iemNativeEmitPcWriteback(pReNative, off);
6056#else
6057 RT_NOREF(pReNative, fGstShwExcept);
6058#endif
6059
6060#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6061 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6062 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6063 {
6064 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6065 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6066
6067 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6068 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxGstSimdReg);
6069
6070 if ( fFlushShadows
6071 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6072 {
6073 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6074
6075 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6076 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6077 }
6078 }
6079#else
6080 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6081#endif
6082
6083 return off;
6084}
6085
6086
6087#ifdef VBOX_STRICT
6088/**
6089 * Does internal register allocator sanity checks.
6090 */
6091static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6092{
6093 /*
6094 * Iterate host registers building a guest shadowing set.
6095 */
6096 uint64_t bmGstRegShadows = 0;
6097 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6098 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6099 while (bmHstRegsWithGstShadow)
6100 {
6101 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6102 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6103 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6104
6105 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6106 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6107 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6108 bmGstRegShadows |= fThisGstRegShadows;
6109 while (fThisGstRegShadows)
6110 {
6111 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6112 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6113 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6114 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6115 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6116 }
6117 }
6118 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6119 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6120 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6121
6122 /*
6123 * Now the other way around, checking the guest to host index array.
6124 */
6125 bmHstRegsWithGstShadow = 0;
6126 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6127 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6128 while (bmGstRegShadows)
6129 {
6130 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6131 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6132 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6133
6134 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6135 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6136 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6137 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6138 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6139 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6140 }
6141 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6142 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6143 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6144}
6145#endif
6146
6147
6148/*********************************************************************************************************************************
6149* Code Emitters (larger snippets) *
6150*********************************************************************************************************************************/
6151
6152/**
6153 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6154 * extending to 64-bit width.
6155 *
6156 * @returns New code buffer offset on success, UINT32_MAX on failure.
6157 * @param pReNative .
6158 * @param off The current code buffer position.
6159 * @param idxHstReg The host register to load the guest register value into.
6160 * @param enmGstReg The guest register to load.
6161 *
6162 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6163 * that is something the caller needs to do if applicable.
6164 */
6165DECL_HIDDEN_THROW(uint32_t)
6166iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6167{
6168 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
6169 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6170
6171 switch (g_aGstShadowInfo[enmGstReg].cb)
6172 {
6173 case sizeof(uint64_t):
6174 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6175 case sizeof(uint32_t):
6176 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6177 case sizeof(uint16_t):
6178 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6179#if 0 /* not present in the table. */
6180 case sizeof(uint8_t):
6181 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6182#endif
6183 default:
6184 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6185 }
6186}
6187
6188
6189#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6190/**
6191 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6192 *
6193 * @returns New code buffer offset on success, UINT32_MAX on failure.
6194 * @param pReNative The recompiler state.
6195 * @param off The current code buffer position.
6196 * @param idxHstSimdReg The host register to load the guest register value into.
6197 * @param enmGstSimdReg The guest register to load.
6198 * @param enmLoadSz The load size of the register.
6199 *
6200 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6201 * that is something the caller needs to do if applicable.
6202 */
6203DECL_HIDDEN_THROW(uint32_t)
6204iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6205 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6206{
6207 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6208
6209 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6210 switch (enmLoadSz)
6211 {
6212 case kIemNativeGstSimdRegLdStSz_256:
6213 return iemNativeEmitSimdLoadVecRegFromVCpuU256(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm,
6214 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6215 case kIemNativeGstSimdRegLdStSz_Low128:
6216 return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6217 case kIemNativeGstSimdRegLdStSz_High128:
6218 return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6219 default:
6220 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6221 }
6222}
6223#endif
6224
6225#ifdef VBOX_STRICT
6226/**
6227 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6228 *
6229 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6230 * Trashes EFLAGS on AMD64.
6231 */
6232static uint32_t
6233iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6234{
6235# ifdef RT_ARCH_AMD64
6236 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6237
6238 /* rol reg64, 32 */
6239 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6240 pbCodeBuf[off++] = 0xc1;
6241 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6242 pbCodeBuf[off++] = 32;
6243
6244 /* test reg32, ffffffffh */
6245 if (idxReg >= 8)
6246 pbCodeBuf[off++] = X86_OP_REX_B;
6247 pbCodeBuf[off++] = 0xf7;
6248 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6249 pbCodeBuf[off++] = 0xff;
6250 pbCodeBuf[off++] = 0xff;
6251 pbCodeBuf[off++] = 0xff;
6252 pbCodeBuf[off++] = 0xff;
6253
6254 /* je/jz +1 */
6255 pbCodeBuf[off++] = 0x74;
6256 pbCodeBuf[off++] = 0x01;
6257
6258 /* int3 */
6259 pbCodeBuf[off++] = 0xcc;
6260
6261 /* rol reg64, 32 */
6262 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6263 pbCodeBuf[off++] = 0xc1;
6264 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6265 pbCodeBuf[off++] = 32;
6266
6267# elif defined(RT_ARCH_ARM64)
6268 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6269 /* lsr tmp0, reg64, #32 */
6270 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6271 /* cbz tmp0, +1 */
6272 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6273 /* brk #0x1100 */
6274 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6275
6276# else
6277# error "Port me!"
6278# endif
6279 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6280 return off;
6281}
6282#endif /* VBOX_STRICT */
6283
6284
6285#ifdef VBOX_STRICT
6286/**
6287 * Emitting code that checks that the content of register @a idxReg is the same
6288 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6289 * instruction if that's not the case.
6290 *
6291 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6292 * Trashes EFLAGS on AMD64.
6293 */
6294static uint32_t
6295iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6296{
6297# ifdef RT_ARCH_AMD64
6298 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6299
6300 /* cmp reg, [mem] */
6301 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6302 {
6303 if (idxReg >= 8)
6304 pbCodeBuf[off++] = X86_OP_REX_R;
6305 pbCodeBuf[off++] = 0x38;
6306 }
6307 else
6308 {
6309 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6310 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6311 else
6312 {
6313 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6314 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6315 else
6316 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6317 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6318 if (idxReg >= 8)
6319 pbCodeBuf[off++] = X86_OP_REX_R;
6320 }
6321 pbCodeBuf[off++] = 0x39;
6322 }
6323 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6324
6325 /* je/jz +1 */
6326 pbCodeBuf[off++] = 0x74;
6327 pbCodeBuf[off++] = 0x01;
6328
6329 /* int3 */
6330 pbCodeBuf[off++] = 0xcc;
6331
6332 /* For values smaller than the register size, we must check that the rest
6333 of the register is all zeros. */
6334 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6335 {
6336 /* test reg64, imm32 */
6337 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6338 pbCodeBuf[off++] = 0xf7;
6339 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6340 pbCodeBuf[off++] = 0;
6341 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6342 pbCodeBuf[off++] = 0xff;
6343 pbCodeBuf[off++] = 0xff;
6344
6345 /* je/jz +1 */
6346 pbCodeBuf[off++] = 0x74;
6347 pbCodeBuf[off++] = 0x01;
6348
6349 /* int3 */
6350 pbCodeBuf[off++] = 0xcc;
6351 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6352 }
6353 else
6354 {
6355 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6356 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6357 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6358 }
6359
6360# elif defined(RT_ARCH_ARM64)
6361 /* mov TMP0, [gstreg] */
6362 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6363
6364 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6365 /* sub tmp0, tmp0, idxReg */
6366 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6367 /* cbz tmp0, +1 */
6368 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6369 /* brk #0x1000+enmGstReg */
6370 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6371 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6372
6373# else
6374# error "Port me!"
6375# endif
6376 return off;
6377}
6378
6379
6380# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6381/**
6382 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6383 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6384 * instruction if that's not the case.
6385 *
6386 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6387 * Trashes EFLAGS on AMD64.
6388 */
6389static uint32_t
6390iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6391 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6392{
6393# ifdef RT_ARCH_AMD64
6394# error "Port me!"
6395# elif defined(RT_ARCH_ARM64)
6396 /* mov vectmp0, [gstreg] */
6397 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6398
6399 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6400 {
6401 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6402 /* eor vectmp0, vectmp0, idxSimdReg */
6403 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6404 /* cnt vectmp0, vectmp0, #0*/
6405 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6406 /* umov tmp0, vectmp0.D[0] */
6407 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6408 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6409 /* cbz tmp0, +1 */
6410 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6411 /* brk #0x1000+enmGstReg */
6412 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6413 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6414 }
6415
6416 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6417 {
6418 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6419 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6420 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6421 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6422 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6423 /* umov tmp0, (vectmp0 + 1).D[0] */
6424 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6425 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6426 /* cbz tmp0, +1 */
6427 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6428 /* brk #0x1000+enmGstReg */
6429 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6430 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6431 }
6432
6433# else
6434# error "Port me!"
6435# endif
6436 return off;
6437}
6438# endif
6439#endif /* VBOX_STRICT */
6440
6441
6442#ifdef VBOX_STRICT
6443/**
6444 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6445 * important bits.
6446 *
6447 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6448 * Trashes EFLAGS on AMD64.
6449 */
6450static uint32_t
6451iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6452{
6453 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6454 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6455 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6456 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6457
6458#ifdef RT_ARCH_AMD64
6459 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6460
6461 /* je/jz +1 */
6462 pbCodeBuf[off++] = 0x74;
6463 pbCodeBuf[off++] = 0x01;
6464
6465 /* int3 */
6466 pbCodeBuf[off++] = 0xcc;
6467
6468# elif defined(RT_ARCH_ARM64)
6469 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6470
6471 /* b.eq +1 */
6472 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6473 /* brk #0x2000 */
6474 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6475
6476# else
6477# error "Port me!"
6478# endif
6479 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6480
6481 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6482 return off;
6483}
6484#endif /* VBOX_STRICT */
6485
6486
6487/**
6488 * Emits a code for checking the return code of a call and rcPassUp, returning
6489 * from the code if either are non-zero.
6490 */
6491DECL_HIDDEN_THROW(uint32_t)
6492iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6493{
6494#ifdef RT_ARCH_AMD64
6495 /*
6496 * AMD64: eax = call status code.
6497 */
6498
6499 /* edx = rcPassUp */
6500 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6501# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6502 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6503# endif
6504
6505 /* edx = eax | rcPassUp */
6506 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6507 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6508 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6509 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6510
6511 /* Jump to non-zero status return path. */
6512 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6513
6514 /* done. */
6515
6516#elif RT_ARCH_ARM64
6517 /*
6518 * ARM64: w0 = call status code.
6519 */
6520# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6521 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6522# endif
6523 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6524
6525 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6526
6527 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6528
6529 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6530 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6531 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6532
6533#else
6534# error "port me"
6535#endif
6536 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6537 RT_NOREF_PV(idxInstr);
6538 return off;
6539}
6540
6541
6542/**
6543 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6544 * raising a \#GP(0) if it isn't.
6545 *
6546 * @returns New code buffer offset, UINT32_MAX on failure.
6547 * @param pReNative The native recompile state.
6548 * @param off The code buffer offset.
6549 * @param idxAddrReg The host register with the address to check.
6550 * @param idxInstr The current instruction.
6551 */
6552DECL_HIDDEN_THROW(uint32_t)
6553iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6554{
6555 /*
6556 * Make sure we don't have any outstanding guest register writes as we may
6557 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6558 */
6559 off = iemNativeRegFlushPendingWrites(pReNative, off);
6560
6561#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6562 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6563#else
6564 RT_NOREF(idxInstr);
6565#endif
6566
6567#ifdef RT_ARCH_AMD64
6568 /*
6569 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6570 * return raisexcpt();
6571 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6572 */
6573 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6574
6575 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6576 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6577 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6578 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6579 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6580
6581 iemNativeRegFreeTmp(pReNative, iTmpReg);
6582
6583#elif defined(RT_ARCH_ARM64)
6584 /*
6585 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6586 * return raisexcpt();
6587 * ----
6588 * mov x1, 0x800000000000
6589 * add x1, x0, x1
6590 * cmp xzr, x1, lsr 48
6591 * b.ne .Lraisexcpt
6592 */
6593 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6594
6595 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6596 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6597 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6598 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6599
6600 iemNativeRegFreeTmp(pReNative, iTmpReg);
6601
6602#else
6603# error "Port me"
6604#endif
6605 return off;
6606}
6607
6608
6609/**
6610 * Emits code to check if that the content of @a idxAddrReg is within the limit
6611 * of CS, raising a \#GP(0) if it isn't.
6612 *
6613 * @returns New code buffer offset; throws VBox status code on error.
6614 * @param pReNative The native recompile state.
6615 * @param off The code buffer offset.
6616 * @param idxAddrReg The host register (32-bit) with the address to
6617 * check.
6618 * @param idxInstr The current instruction.
6619 */
6620DECL_HIDDEN_THROW(uint32_t)
6621iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6622 uint8_t idxAddrReg, uint8_t idxInstr)
6623{
6624 /*
6625 * Make sure we don't have any outstanding guest register writes as we may
6626 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6627 */
6628 off = iemNativeRegFlushPendingWrites(pReNative, off);
6629
6630#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6631 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6632#else
6633 RT_NOREF(idxInstr);
6634#endif
6635
6636 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6637 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6638 kIemNativeGstRegUse_ReadOnly);
6639
6640 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6641 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6642
6643 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6644 return off;
6645}
6646
6647
6648/**
6649 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
6650 *
6651 * @returns The flush mask.
6652 * @param fCImpl The IEM_CIMPL_F_XXX flags.
6653 * @param fGstShwFlush The starting flush mask.
6654 */
6655DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
6656{
6657 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
6658 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
6659 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
6660 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
6661 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
6662 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
6663 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
6664 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
6665 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
6666 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
6667 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
6668 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
6669 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6670 return fGstShwFlush;
6671}
6672
6673
6674/**
6675 * Emits a call to a CImpl function or something similar.
6676 */
6677DECL_HIDDEN_THROW(uint32_t)
6678iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6679 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6680{
6681 /* Writeback everything. */
6682 off = iemNativeRegFlushPendingWrites(pReNative, off);
6683
6684 /*
6685 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6686 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6687 */
6688 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6689 fGstShwFlush
6690 | RT_BIT_64(kIemNativeGstReg_Pc)
6691 | RT_BIT_64(kIemNativeGstReg_EFlags));
6692 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6693
6694 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6695
6696 /*
6697 * Load the parameters.
6698 */
6699#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6700 /* Special code the hidden VBOXSTRICTRC pointer. */
6701 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6702 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6703 if (cAddParams > 0)
6704 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6705 if (cAddParams > 1)
6706 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6707 if (cAddParams > 2)
6708 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6709 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6710
6711#else
6712 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6713 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6714 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6715 if (cAddParams > 0)
6716 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6717 if (cAddParams > 1)
6718 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6719 if (cAddParams > 2)
6720# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6721 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6722# else
6723 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6724# endif
6725#endif
6726
6727 /*
6728 * Make the call.
6729 */
6730 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6731
6732#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6733 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6734#endif
6735
6736 /*
6737 * Check the status code.
6738 */
6739 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6740}
6741
6742
6743/**
6744 * Emits a call to a threaded worker function.
6745 */
6746DECL_HIDDEN_THROW(uint32_t)
6747iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6748{
6749 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6750 off = iemNativeRegFlushPendingWrites(pReNative, off);
6751
6752 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6753 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6754
6755#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6756 /* The threaded function may throw / long jmp, so set current instruction
6757 number if we're counting. */
6758 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6759#endif
6760
6761 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6762
6763#ifdef RT_ARCH_AMD64
6764 /* Load the parameters and emit the call. */
6765# ifdef RT_OS_WINDOWS
6766# ifndef VBOXSTRICTRC_STRICT_ENABLED
6767 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6768 if (cParams > 0)
6769 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6770 if (cParams > 1)
6771 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6772 if (cParams > 2)
6773 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6774# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6775 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6776 if (cParams > 0)
6777 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6778 if (cParams > 1)
6779 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6780 if (cParams > 2)
6781 {
6782 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6783 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6784 }
6785 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6786# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6787# else
6788 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6789 if (cParams > 0)
6790 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6791 if (cParams > 1)
6792 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6793 if (cParams > 2)
6794 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6795# endif
6796
6797 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6798
6799# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6800 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6801# endif
6802
6803#elif RT_ARCH_ARM64
6804 /*
6805 * ARM64:
6806 */
6807 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6808 if (cParams > 0)
6809 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6810 if (cParams > 1)
6811 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6812 if (cParams > 2)
6813 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6814
6815 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6816
6817#else
6818# error "port me"
6819#endif
6820
6821 /*
6822 * Check the status code.
6823 */
6824 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6825
6826 return off;
6827}
6828
6829#ifdef VBOX_WITH_STATISTICS
6830/**
6831 * Emits code to update the thread call statistics.
6832 */
6833DECL_INLINE_THROW(uint32_t)
6834iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6835{
6836 /*
6837 * Update threaded function stats.
6838 */
6839 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6840 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6841# if defined(RT_ARCH_ARM64)
6842 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6843 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6844 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6845 iemNativeRegFreeTmp(pReNative, idxTmp1);
6846 iemNativeRegFreeTmp(pReNative, idxTmp2);
6847# else
6848 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6849# endif
6850 return off;
6851}
6852#endif /* VBOX_WITH_STATISTICS */
6853
6854
6855/**
6856 * Emits the code at the CheckBranchMiss label.
6857 */
6858static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6859{
6860 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6861 if (idxLabel != UINT32_MAX)
6862 {
6863 iemNativeLabelDefine(pReNative, idxLabel, off);
6864
6865 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6866 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6867 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6868
6869 /* jump back to the return sequence. */
6870 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6871 }
6872 return off;
6873}
6874
6875
6876/**
6877 * Emits the code at the NeedCsLimChecking label.
6878 */
6879static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6880{
6881 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6882 if (idxLabel != UINT32_MAX)
6883 {
6884 iemNativeLabelDefine(pReNative, idxLabel, off);
6885
6886 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6887 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6888 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6889
6890 /* jump back to the return sequence. */
6891 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6892 }
6893 return off;
6894}
6895
6896
6897/**
6898 * Emits the code at the ObsoleteTb label.
6899 */
6900static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6901{
6902 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6903 if (idxLabel != UINT32_MAX)
6904 {
6905 iemNativeLabelDefine(pReNative, idxLabel, off);
6906
6907 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6908 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6909 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6910
6911 /* jump back to the return sequence. */
6912 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6913 }
6914 return off;
6915}
6916
6917
6918/**
6919 * Emits the code at the RaiseGP0 label.
6920 */
6921static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6922{
6923 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6924 if (idxLabel != UINT32_MAX)
6925 {
6926 iemNativeLabelDefine(pReNative, idxLabel, off);
6927
6928 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6929 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6930 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
6931
6932 /* jump back to the return sequence. */
6933 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6934 }
6935 return off;
6936}
6937
6938
6939/**
6940 * Emits the code at the RaiseNm label.
6941 */
6942static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6943{
6944 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
6945 if (idxLabel != UINT32_MAX)
6946 {
6947 iemNativeLabelDefine(pReNative, idxLabel, off);
6948
6949 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
6950 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6951 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
6952
6953 /* jump back to the return sequence. */
6954 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6955 }
6956 return off;
6957}
6958
6959
6960/**
6961 * Emits the code at the RaiseUd label.
6962 */
6963static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6964{
6965 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
6966 if (idxLabel != UINT32_MAX)
6967 {
6968 iemNativeLabelDefine(pReNative, idxLabel, off);
6969
6970 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
6971 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6972 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
6973
6974 /* jump back to the return sequence. */
6975 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6976 }
6977 return off;
6978}
6979
6980
6981/**
6982 * Emits the code at the RaiseMf label.
6983 */
6984static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6985{
6986 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
6987 if (idxLabel != UINT32_MAX)
6988 {
6989 iemNativeLabelDefine(pReNative, idxLabel, off);
6990
6991 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
6992 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6993 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
6994
6995 /* jump back to the return sequence. */
6996 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6997 }
6998 return off;
6999}
7000
7001
7002/**
7003 * Emits the code at the RaiseXf label.
7004 */
7005static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7006{
7007 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
7008 if (idxLabel != UINT32_MAX)
7009 {
7010 iemNativeLabelDefine(pReNative, idxLabel, off);
7011
7012 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
7013 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7014 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
7015
7016 /* jump back to the return sequence. */
7017 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7018 }
7019 return off;
7020}
7021
7022
7023/**
7024 * Emits the code at the ReturnWithFlags label (returns
7025 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7026 */
7027static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7028{
7029 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7030 if (idxLabel != UINT32_MAX)
7031 {
7032 iemNativeLabelDefine(pReNative, idxLabel, off);
7033
7034 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7035
7036 /* jump back to the return sequence. */
7037 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7038 }
7039 return off;
7040}
7041
7042
7043/**
7044 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7045 */
7046static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7047{
7048 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7049 if (idxLabel != UINT32_MAX)
7050 {
7051 iemNativeLabelDefine(pReNative, idxLabel, off);
7052
7053 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7054
7055 /* jump back to the return sequence. */
7056 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7057 }
7058 return off;
7059}
7060
7061
7062/**
7063 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7064 */
7065static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7066{
7067 /*
7068 * Generate the rc + rcPassUp fiddling code if needed.
7069 */
7070 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7071 if (idxLabel != UINT32_MAX)
7072 {
7073 iemNativeLabelDefine(pReNative, idxLabel, off);
7074
7075 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7076#ifdef RT_ARCH_AMD64
7077# ifdef RT_OS_WINDOWS
7078# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7079 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7080# endif
7081 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7082 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7083# else
7084 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7085 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7086# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7087 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7088# endif
7089# endif
7090# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7091 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7092# endif
7093
7094#else
7095 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7096 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7097 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7098#endif
7099
7100 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7101 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7102 }
7103 return off;
7104}
7105
7106
7107/**
7108 * Emits a standard epilog.
7109 */
7110static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7111{
7112 *pidxReturnLabel = UINT32_MAX;
7113
7114 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7115 off = iemNativeRegFlushPendingWrites(pReNative, off);
7116
7117 /*
7118 * Successful return, so clear the return register (eax, w0).
7119 */
7120 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7121
7122 /*
7123 * Define label for common return point.
7124 */
7125 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7126 *pidxReturnLabel = idxReturn;
7127
7128 /*
7129 * Restore registers and return.
7130 */
7131#ifdef RT_ARCH_AMD64
7132 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7133
7134 /* Reposition esp at the r15 restore point. */
7135 pbCodeBuf[off++] = X86_OP_REX_W;
7136 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7137 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7138 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7139
7140 /* Pop non-volatile registers and return */
7141 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7142 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7143 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7144 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7145 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7146 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7147 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7148 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7149# ifdef RT_OS_WINDOWS
7150 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7151 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7152# endif
7153 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7154 pbCodeBuf[off++] = 0xc9; /* leave */
7155 pbCodeBuf[off++] = 0xc3; /* ret */
7156 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7157
7158#elif RT_ARCH_ARM64
7159 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7160
7161 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7162 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7163 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7164 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7165 IEMNATIVE_FRAME_VAR_SIZE / 8);
7166 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7167 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7168 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7169 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7170 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7171 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7172 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7173 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7174 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7175 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7176 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7177 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7178
7179 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7180 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7181 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7182 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7183
7184 /* retab / ret */
7185# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7186 if (1)
7187 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7188 else
7189# endif
7190 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7191
7192#else
7193# error "port me"
7194#endif
7195 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7196
7197 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7198}
7199
7200
7201/**
7202 * Emits a standard prolog.
7203 */
7204static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7205{
7206#ifdef RT_ARCH_AMD64
7207 /*
7208 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7209 * reserving 64 bytes for stack variables plus 4 non-register argument
7210 * slots. Fixed register assignment: xBX = pReNative;
7211 *
7212 * Since we always do the same register spilling, we can use the same
7213 * unwind description for all the code.
7214 */
7215 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7216 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7217 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7218 pbCodeBuf[off++] = 0x8b;
7219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7220 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7221 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7222# ifdef RT_OS_WINDOWS
7223 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7224 pbCodeBuf[off++] = 0x8b;
7225 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7226 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7227 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7228# else
7229 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7230 pbCodeBuf[off++] = 0x8b;
7231 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7232# endif
7233 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7234 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7235 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7236 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7237 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7238 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7239 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7240 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7241
7242# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7243 /* Save the frame pointer. */
7244 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7245# endif
7246
7247 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7248 X86_GREG_xSP,
7249 IEMNATIVE_FRAME_ALIGN_SIZE
7250 + IEMNATIVE_FRAME_VAR_SIZE
7251 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7252 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7253 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7254 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7255 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7256
7257#elif RT_ARCH_ARM64
7258 /*
7259 * We set up a stack frame exactly like on x86, only we have to push the
7260 * return address our selves here. We save all non-volatile registers.
7261 */
7262 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7263
7264# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7265 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7266 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7267 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7268 /* pacibsp */
7269 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7270# endif
7271
7272 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7273 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7274 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7275 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7276 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7277 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7278 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7279 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7280 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7281 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7282 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7283 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7284 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7285 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7286 /* Save the BP and LR (ret address) registers at the top of the frame. */
7287 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7288 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7289 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7290 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7291 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7292 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7293
7294 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7295 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7296
7297 /* mov r28, r0 */
7298 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7299 /* mov r27, r1 */
7300 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7301
7302# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7303 /* Save the frame pointer. */
7304 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7305 ARMV8_A64_REG_X2);
7306# endif
7307
7308#else
7309# error "port me"
7310#endif
7311 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7312 return off;
7313}
7314
7315
7316
7317
7318/*********************************************************************************************************************************
7319* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
7320*********************************************************************************************************************************/
7321
7322#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
7323 { \
7324 Assert(pReNative->Core.bmVars == 0); \
7325 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
7326 Assert(pReNative->Core.bmStack == 0); \
7327 pReNative->fMc = (a_fMcFlags); \
7328 pReNative->fCImpl = (a_fCImplFlags); \
7329 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
7330
7331/** We have to get to the end in recompilation mode, as otherwise we won't
7332 * generate code for all the IEM_MC_IF_XXX branches. */
7333#define IEM_MC_END() \
7334 iemNativeVarFreeAll(pReNative); \
7335 } return off
7336
7337
7338
7339/*********************************************************************************************************************************
7340* Native Emitter Support. *
7341*********************************************************************************************************************************/
7342
7343
7344#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
7345
7346#define IEM_MC_NATIVE_ELSE() } else {
7347
7348#define IEM_MC_NATIVE_ENDIF() } ((void)0)
7349
7350
7351#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
7352 off = a_fnEmitter(pReNative, off)
7353
7354#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
7355 off = a_fnEmitter(pReNative, off, (a0))
7356
7357#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
7358 off = a_fnEmitter(pReNative, off, (a0), (a1))
7359
7360#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
7361 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
7362
7363#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
7364 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
7365
7366#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
7367 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
7368
7369#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
7370 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
7371
7372#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
7373 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
7374
7375#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
7376 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
7377
7378
7379
7380/*********************************************************************************************************************************
7381* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
7382*********************************************************************************************************************************/
7383
7384#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
7385 pReNative->fMc = 0; \
7386 pReNative->fCImpl = (a_fFlags); \
7387 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
7388
7389
7390#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7391 pReNative->fMc = 0; \
7392 pReNative->fCImpl = (a_fFlags); \
7393 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
7394
7395DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7396 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7397 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
7398{
7399 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
7400}
7401
7402
7403#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7404 pReNative->fMc = 0; \
7405 pReNative->fCImpl = (a_fFlags); \
7406 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7407 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
7408
7409DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7410 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7411 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
7412{
7413 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
7414}
7415
7416
7417#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7418 pReNative->fMc = 0; \
7419 pReNative->fCImpl = (a_fFlags); \
7420 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7421 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
7422
7423DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7424 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7425 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
7426 uint64_t uArg2)
7427{
7428 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
7429}
7430
7431
7432
7433/*********************************************************************************************************************************
7434* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
7435*********************************************************************************************************************************/
7436
7437/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
7438 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
7439DECL_INLINE_THROW(uint32_t)
7440iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7441{
7442 /*
7443 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
7444 * return with special status code and make the execution loop deal with
7445 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
7446 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
7447 * could continue w/o interruption, it probably will drop into the
7448 * debugger, so not worth the effort of trying to services it here and we
7449 * just lump it in with the handling of the others.
7450 *
7451 * To simplify the code and the register state management even more (wrt
7452 * immediate in AND operation), we always update the flags and skip the
7453 * extra check associated conditional jump.
7454 */
7455 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
7456 <= UINT32_MAX);
7457#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7458 AssertMsg( pReNative->idxCurCall == 0
7459 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
7460 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
7461#endif
7462
7463 /*
7464 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
7465 * any pending register writes must be flushed.
7466 */
7467 off = iemNativeRegFlushPendingWrites(pReNative, off);
7468
7469 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7470 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
7471 true /*fSkipLivenessAssert*/);
7472 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
7473 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
7474 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
7475 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
7476 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
7477
7478 /* Free but don't flush the EFLAGS register. */
7479 iemNativeRegFreeTmp(pReNative, idxEflReg);
7480
7481 return off;
7482}
7483
7484
7485/** The VINF_SUCCESS dummy. */
7486template<int const a_rcNormal>
7487DECL_FORCE_INLINE(uint32_t)
7488iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7489{
7490 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
7491 if (a_rcNormal != VINF_SUCCESS)
7492 {
7493#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7494 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7495#else
7496 RT_NOREF_PV(idxInstr);
7497#endif
7498
7499 /* As this code returns from the TB any pending register writes must be flushed. */
7500 off = iemNativeRegFlushPendingWrites(pReNative, off);
7501
7502 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
7503 }
7504 return off;
7505}
7506
7507
7508#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
7509 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7510 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7511
7512#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7513 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7514 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7515 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7516
7517/** Same as iemRegAddToRip64AndFinishingNoFlags. */
7518DECL_INLINE_THROW(uint32_t)
7519iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7520{
7521#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7522# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7523 if (!pReNative->Core.offPc)
7524 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7525# endif
7526
7527 /* Allocate a temporary PC register. */
7528 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7529
7530 /* Perform the addition and store the result. */
7531 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
7532 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7533
7534 /* Free but don't flush the PC register. */
7535 iemNativeRegFreeTmp(pReNative, idxPcReg);
7536#endif
7537
7538#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7539 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7540
7541 pReNative->Core.offPc += cbInstr;
7542# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7543 off = iemNativePcAdjustCheck(pReNative, off);
7544# endif
7545 if (pReNative->cCondDepth)
7546 off = iemNativeEmitPcWriteback(pReNative, off);
7547 else
7548 pReNative->Core.cInstrPcUpdateSkipped++;
7549#endif
7550
7551 return off;
7552}
7553
7554
7555#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
7556 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7557 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7558
7559#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7560 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7561 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7562 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7563
7564/** Same as iemRegAddToEip32AndFinishingNoFlags. */
7565DECL_INLINE_THROW(uint32_t)
7566iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7567{
7568#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7569# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7570 if (!pReNative->Core.offPc)
7571 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7572# endif
7573
7574 /* Allocate a temporary PC register. */
7575 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7576
7577 /* Perform the addition and store the result. */
7578 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7579 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7580
7581 /* Free but don't flush the PC register. */
7582 iemNativeRegFreeTmp(pReNative, idxPcReg);
7583#endif
7584
7585#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7586 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7587
7588 pReNative->Core.offPc += cbInstr;
7589# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7590 off = iemNativePcAdjustCheck(pReNative, off);
7591# endif
7592 if (pReNative->cCondDepth)
7593 off = iemNativeEmitPcWriteback(pReNative, off);
7594 else
7595 pReNative->Core.cInstrPcUpdateSkipped++;
7596#endif
7597
7598 return off;
7599}
7600
7601
7602#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
7603 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7604 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7605
7606#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7607 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7608 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7609 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7610
7611/** Same as iemRegAddToIp16AndFinishingNoFlags. */
7612DECL_INLINE_THROW(uint32_t)
7613iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7614{
7615#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7616# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7617 if (!pReNative->Core.offPc)
7618 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7619# endif
7620
7621 /* Allocate a temporary PC register. */
7622 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7623
7624 /* Perform the addition and store the result. */
7625 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7626 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7627 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7628
7629 /* Free but don't flush the PC register. */
7630 iemNativeRegFreeTmp(pReNative, idxPcReg);
7631#endif
7632
7633#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7634 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7635
7636 pReNative->Core.offPc += cbInstr;
7637# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7638 off = iemNativePcAdjustCheck(pReNative, off);
7639# endif
7640 if (pReNative->cCondDepth)
7641 off = iemNativeEmitPcWriteback(pReNative, off);
7642 else
7643 pReNative->Core.cInstrPcUpdateSkipped++;
7644#endif
7645
7646 return off;
7647}
7648
7649
7650
7651/*********************************************************************************************************************************
7652* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
7653*********************************************************************************************************************************/
7654
7655#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7656 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7657 (a_enmEffOpSize), pCallEntry->idxInstr); \
7658 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7659
7660#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7661 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7662 (a_enmEffOpSize), pCallEntry->idxInstr); \
7663 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7664 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7665
7666#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
7667 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7668 IEMMODE_16BIT, pCallEntry->idxInstr); \
7669 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7670
7671#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7672 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7673 IEMMODE_16BIT, pCallEntry->idxInstr); \
7674 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7675 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7676
7677#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
7678 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7679 IEMMODE_64BIT, pCallEntry->idxInstr); \
7680 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7681
7682#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7683 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7684 IEMMODE_64BIT, pCallEntry->idxInstr); \
7685 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7686 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7687
7688/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
7689 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
7690 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
7691DECL_INLINE_THROW(uint32_t)
7692iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7693 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7694{
7695 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
7696
7697 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7698 off = iemNativeRegFlushPendingWrites(pReNative, off);
7699
7700#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7701 Assert(pReNative->Core.offPc == 0);
7702
7703 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7704#endif
7705
7706 /* Allocate a temporary PC register. */
7707 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7708
7709 /* Perform the addition. */
7710 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
7711
7712 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
7713 {
7714 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
7715 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7716 }
7717 else
7718 {
7719 /* Just truncate the result to 16-bit IP. */
7720 Assert(enmEffOpSize == IEMMODE_16BIT);
7721 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7722 }
7723 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7724
7725 /* Free but don't flush the PC register. */
7726 iemNativeRegFreeTmp(pReNative, idxPcReg);
7727
7728 return off;
7729}
7730
7731
7732#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7733 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7734 (a_enmEffOpSize), pCallEntry->idxInstr); \
7735 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7736
7737#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7738 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7739 (a_enmEffOpSize), pCallEntry->idxInstr); \
7740 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7741 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7742
7743#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
7744 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7745 IEMMODE_16BIT, pCallEntry->idxInstr); \
7746 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7747
7748#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7749 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7750 IEMMODE_16BIT, pCallEntry->idxInstr); \
7751 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7752 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7753
7754#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
7755 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7756 IEMMODE_32BIT, pCallEntry->idxInstr); \
7757 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7758
7759#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7760 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7761 IEMMODE_32BIT, pCallEntry->idxInstr); \
7762 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7763 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7764
7765/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
7766 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
7767 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
7768DECL_INLINE_THROW(uint32_t)
7769iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7770 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7771{
7772 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
7773
7774 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7775 off = iemNativeRegFlushPendingWrites(pReNative, off);
7776
7777#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7778 Assert(pReNative->Core.offPc == 0);
7779
7780 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7781#endif
7782
7783 /* Allocate a temporary PC register. */
7784 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7785
7786 /* Perform the addition. */
7787 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7788
7789 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
7790 if (enmEffOpSize == IEMMODE_16BIT)
7791 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7792
7793 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
7794/** @todo we can skip this in 32-bit FLAT mode. */
7795 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7796
7797 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7798
7799 /* Free but don't flush the PC register. */
7800 iemNativeRegFreeTmp(pReNative, idxPcReg);
7801
7802 return off;
7803}
7804
7805
7806#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
7807 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7808 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7809
7810#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
7811 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7812 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7813 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7814
7815#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
7816 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7817 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7818
7819#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7820 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7821 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7822 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7823
7824#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
7825 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7826 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7827
7828#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7829 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7830 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7831 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7832
7833/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
7834DECL_INLINE_THROW(uint32_t)
7835iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7836 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
7837{
7838 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7839 off = iemNativeRegFlushPendingWrites(pReNative, off);
7840
7841#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7842 Assert(pReNative->Core.offPc == 0);
7843
7844 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7845#endif
7846
7847 /* Allocate a temporary PC register. */
7848 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7849
7850 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
7851 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7852 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7853 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7854 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7855
7856 /* Free but don't flush the PC register. */
7857 iemNativeRegFreeTmp(pReNative, idxPcReg);
7858
7859 return off;
7860}
7861
7862
7863
7864/*********************************************************************************************************************************
7865* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
7866*********************************************************************************************************************************/
7867
7868/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
7869#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
7870 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7871
7872/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
7873#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
7874 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7875
7876/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
7877#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
7878 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7879
7880/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
7881 * clears flags. */
7882#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
7883 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
7884 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7885
7886/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
7887 * clears flags. */
7888#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
7889 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
7890 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7891
7892/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
7893 * clears flags. */
7894#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
7895 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
7896 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7897
7898#undef IEM_MC_SET_RIP_U16_AND_FINISH
7899
7900
7901/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
7902#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
7903 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7904
7905/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
7906#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
7907 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7908
7909/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
7910 * clears flags. */
7911#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
7912 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
7913 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7914
7915/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
7916 * and clears flags. */
7917#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
7918 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
7919 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7920
7921#undef IEM_MC_SET_RIP_U32_AND_FINISH
7922
7923
7924/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
7925#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
7926 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
7927
7928/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
7929 * and clears flags. */
7930#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
7931 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
7932 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7933
7934#undef IEM_MC_SET_RIP_U64_AND_FINISH
7935
7936
7937/** Same as iemRegRipJumpU16AndFinishNoFlags,
7938 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
7939DECL_INLINE_THROW(uint32_t)
7940iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
7941 uint8_t idxInstr, uint8_t cbVar)
7942{
7943 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
7944 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
7945
7946 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7947 off = iemNativeRegFlushPendingWrites(pReNative, off);
7948
7949#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7950 Assert(pReNative->Core.offPc == 0);
7951
7952 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7953#endif
7954
7955 /* Get a register with the new PC loaded from idxVarPc.
7956 Note! This ASSUMES that the high bits of the GPR is zeroed. */
7957 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
7958
7959 /* Check limit (may #GP(0) + exit TB). */
7960 if (!f64Bit)
7961/** @todo we can skip this test in FLAT 32-bit mode. */
7962 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7963 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
7964 else if (cbVar > sizeof(uint32_t))
7965 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7966
7967 /* Store the result. */
7968 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7969
7970 iemNativeVarRegisterRelease(pReNative, idxVarPc);
7971 /** @todo implictly free the variable? */
7972
7973 return off;
7974}
7975
7976
7977
7978/*********************************************************************************************************************************
7979* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
7980*********************************************************************************************************************************/
7981
7982#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
7983 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
7984
7985/**
7986 * Emits code to check if a \#NM exception should be raised.
7987 *
7988 * @returns New code buffer offset, UINT32_MAX on failure.
7989 * @param pReNative The native recompile state.
7990 * @param off The code buffer offset.
7991 * @param idxInstr The current instruction.
7992 */
7993DECL_INLINE_THROW(uint32_t)
7994iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7995{
7996 /*
7997 * Make sure we don't have any outstanding guest register writes as we may
7998 * raise an #NM and all guest register must be up to date in CPUMCTX.
7999 *
8000 * @todo r=aeichner Can we postpone this to the RaiseNm path?
8001 */
8002 off = iemNativeRegFlushPendingWrites(pReNative, off);
8003
8004#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8005 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8006#else
8007 RT_NOREF(idxInstr);
8008#endif
8009
8010 /* Allocate a temporary CR0 register. */
8011 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8012 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8013
8014 /*
8015 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
8016 * return raisexcpt();
8017 */
8018 /* Test and jump. */
8019 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
8020
8021 /* Free but don't flush the CR0 register. */
8022 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8023
8024 return off;
8025}
8026
8027
8028#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
8029 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
8030
8031/**
8032 * Emits code to check if a \#MF exception should be raised.
8033 *
8034 * @returns New code buffer offset, UINT32_MAX on failure.
8035 * @param pReNative The native recompile state.
8036 * @param off The code buffer offset.
8037 * @param idxInstr The current instruction.
8038 */
8039DECL_INLINE_THROW(uint32_t)
8040iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8041{
8042 /*
8043 * Make sure we don't have any outstanding guest register writes as we may
8044 * raise an #MF and all guest register must be up to date in CPUMCTX.
8045 *
8046 * @todo r=aeichner Can we postpone this to the RaiseMf path?
8047 */
8048 off = iemNativeRegFlushPendingWrites(pReNative, off);
8049
8050#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8051 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8052#else
8053 RT_NOREF(idxInstr);
8054#endif
8055
8056 /* Allocate a temporary FSW register. */
8057 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
8058 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
8059
8060 /*
8061 * if (FSW & X86_FSW_ES != 0)
8062 * return raisexcpt();
8063 */
8064 /* Test and jump. */
8065 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
8066
8067 /* Free but don't flush the FSW register. */
8068 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
8069
8070 return off;
8071}
8072
8073
8074#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
8075 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8076
8077/**
8078 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
8079 *
8080 * @returns New code buffer offset, UINT32_MAX on failure.
8081 * @param pReNative The native recompile state.
8082 * @param off The code buffer offset.
8083 * @param idxInstr The current instruction.
8084 */
8085DECL_INLINE_THROW(uint32_t)
8086iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8087{
8088 /*
8089 * Make sure we don't have any outstanding guest register writes as we may
8090 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8091 *
8092 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8093 */
8094 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8095
8096#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8097 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8098#else
8099 RT_NOREF(idxInstr);
8100#endif
8101
8102 /* Allocate a temporary CR0 and CR4 register. */
8103 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8104 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8105 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8106 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8107
8108 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8109 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
8110 * actual performance benefit first). */
8111 /*
8112 * if (cr0 & X86_CR0_EM)
8113 * return raisexcpt();
8114 */
8115 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_EM_BIT, idxLabelRaiseUd);
8116 /*
8117 * if (!(cr4 & X86_CR4_OSFXSR))
8118 * return raisexcpt();
8119 */
8120 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR_BIT, idxLabelRaiseUd);
8121 /*
8122 * if (cr0 & X86_CR0_TS)
8123 * return raisexcpt();
8124 */
8125 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8126
8127 /* Free but don't flush the CR0 and CR4 register. */
8128 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8129 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8130
8131 return off;
8132}
8133
8134
8135#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
8136 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8137
8138/**
8139 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
8140 *
8141 * @returns New code buffer offset, UINT32_MAX on failure.
8142 * @param pReNative The native recompile state.
8143 * @param off The code buffer offset.
8144 * @param idxInstr The current instruction.
8145 */
8146DECL_INLINE_THROW(uint32_t)
8147iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8148{
8149 /*
8150 * Make sure we don't have any outstanding guest register writes as we may
8151 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8152 *
8153 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8154 */
8155 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8156
8157#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8158 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8159#else
8160 RT_NOREF(idxInstr);
8161#endif
8162
8163 /* Allocate a temporary CR0, CR4 and XCR0 register. */
8164 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8165 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8166 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
8167 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8168 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8169
8170#if 1
8171 off = iemNativeEmitBrk(pReNative, off, 0x4223); /** @todo Test this when AVX gets actually available. */
8172#endif
8173
8174 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8175 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
8176 * actual performance benefit first). */
8177 /*
8178 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
8179 * return raisexcpt();
8180 */
8181 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
8182 off = iemNativeEmitOrGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
8183 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
8184 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8185
8186 /*
8187 * if (!(cr4 & X86_CR4_OSXSAVE))
8188 * return raisexcpt();
8189 */
8190 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
8191 /*
8192 * if (cr0 & X86_CR0_TS)
8193 * return raisexcpt();
8194 */
8195 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8196
8197 /* Free but don't flush the CR0, CR4 and XCR0 register. */
8198 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8199 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8200 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
8201
8202 return off;
8203}
8204
8205
8206#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
8207 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
8208
8209/**
8210 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
8211 *
8212 * @returns New code buffer offset, UINT32_MAX on failure.
8213 * @param pReNative The native recompile state.
8214 * @param off The code buffer offset.
8215 * @param idxInstr The current instruction.
8216 */
8217DECL_INLINE_THROW(uint32_t)
8218iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8219{
8220 /*
8221 * Make sure we don't have any outstanding guest register writes as we may
8222 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8223 *
8224 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8225 */
8226 off = iemNativeRegFlushPendingWrites(pReNative, off);
8227
8228#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8229 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8230#else
8231 RT_NOREF(idxInstr);
8232#endif
8233
8234 /* Allocate a temporary CR4 register. */
8235 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8236 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
8237 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8238
8239 /*
8240 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
8241 * return raisexcpt();
8242 */
8243 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
8244
8245 /* raise \#UD exception unconditionally. */
8246 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
8247
8248 /* Free but don't flush the CR4 register. */
8249 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8250
8251 return off;
8252}
8253
8254
8255
8256/*********************************************************************************************************************************
8257* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
8258*********************************************************************************************************************************/
8259
8260/**
8261 * Pushes an IEM_MC_IF_XXX onto the condition stack.
8262 *
8263 * @returns Pointer to the condition stack entry on success, NULL on failure
8264 * (too many nestings)
8265 */
8266DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
8267{
8268#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8269 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
8270#endif
8271
8272 uint32_t const idxStack = pReNative->cCondDepth;
8273 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
8274
8275 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
8276 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
8277
8278 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
8279 pEntry->fInElse = false;
8280 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
8281 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
8282
8283 return pEntry;
8284}
8285
8286
8287/**
8288 * Start of the if-block, snapshotting the register and variable state.
8289 */
8290DECL_INLINE_THROW(void)
8291iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
8292{
8293 Assert(offIfBlock != UINT32_MAX);
8294 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8295 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8296 Assert(!pEntry->fInElse);
8297
8298 /* Define the start of the IF block if request or for disassembly purposes. */
8299 if (idxLabelIf != UINT32_MAX)
8300 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
8301#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8302 else
8303 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
8304#else
8305 RT_NOREF(offIfBlock);
8306#endif
8307
8308#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8309 Assert(pReNative->Core.offPc == 0);
8310#endif
8311
8312 /* Copy the initial state so we can restore it in the 'else' block. */
8313 pEntry->InitialState = pReNative->Core;
8314}
8315
8316
8317#define IEM_MC_ELSE() } while (0); \
8318 off = iemNativeEmitElse(pReNative, off); \
8319 do {
8320
8321/** Emits code related to IEM_MC_ELSE. */
8322DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8323{
8324 /* Check sanity and get the conditional stack entry. */
8325 Assert(off != UINT32_MAX);
8326 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8327 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8328 Assert(!pEntry->fInElse);
8329
8330 /* Jump to the endif */
8331 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
8332
8333 /* Define the else label and enter the else part of the condition. */
8334 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8335 pEntry->fInElse = true;
8336
8337#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8338 Assert(pReNative->Core.offPc == 0);
8339#endif
8340
8341 /* Snapshot the core state so we can do a merge at the endif and restore
8342 the snapshot we took at the start of the if-block. */
8343 pEntry->IfFinalState = pReNative->Core;
8344 pReNative->Core = pEntry->InitialState;
8345
8346 return off;
8347}
8348
8349
8350#define IEM_MC_ENDIF() } while (0); \
8351 off = iemNativeEmitEndIf(pReNative, off)
8352
8353/** Emits code related to IEM_MC_ENDIF. */
8354DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8355{
8356 /* Check sanity and get the conditional stack entry. */
8357 Assert(off != UINT32_MAX);
8358 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8359 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8360
8361#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8362 Assert(pReNative->Core.offPc == 0);
8363#endif
8364
8365 /*
8366 * Now we have find common group with the core state at the end of the
8367 * if-final. Use the smallest common denominator and just drop anything
8368 * that isn't the same in both states.
8369 */
8370 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
8371 * which is why we're doing this at the end of the else-block.
8372 * But we'd need more info about future for that to be worth the effort. */
8373 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
8374 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
8375 {
8376 /* shadow guest stuff first. */
8377 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
8378 if (fGstRegs)
8379 {
8380 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
8381 do
8382 {
8383 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
8384 fGstRegs &= ~RT_BIT_64(idxGstReg);
8385
8386 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
8387 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
8388 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
8389 {
8390 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
8391 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
8392 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
8393 }
8394 } while (fGstRegs);
8395 }
8396 else
8397 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
8398
8399 /* Check variables next. For now we must require them to be identical
8400 or stuff we can recreate. */
8401 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
8402 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
8403 if (fVars)
8404 {
8405 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
8406 do
8407 {
8408 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
8409 fVars &= ~RT_BIT_32(idxVar);
8410
8411 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
8412 {
8413 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
8414 continue;
8415 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
8416 {
8417 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8418 if (idxHstReg != UINT8_MAX)
8419 {
8420 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8421 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8422 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
8423 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8424 }
8425 continue;
8426 }
8427 }
8428 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
8429 continue;
8430
8431 /* Irreconcilable, so drop it. */
8432 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8433 if (idxHstReg != UINT8_MAX)
8434 {
8435 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8436 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8437 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
8438 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8439 }
8440 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8441 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8442 } while (fVars);
8443 }
8444
8445 /* Finally, check that the host register allocations matches. */
8446 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
8447 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
8448 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
8449 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
8450 }
8451
8452 /*
8453 * Define the endif label and maybe the else one if we're still in the 'if' part.
8454 */
8455 if (!pEntry->fInElse)
8456 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8457 else
8458 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
8459 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
8460
8461 /* Pop the conditional stack.*/
8462 pReNative->cCondDepth -= 1;
8463
8464 return off;
8465}
8466
8467
8468#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
8469 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
8470 do {
8471
8472/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
8473DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8474{
8475 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8476
8477 /* Get the eflags. */
8478 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8479 kIemNativeGstRegUse_ReadOnly);
8480
8481 /* Test and jump. */
8482 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8483
8484 /* Free but don't flush the EFlags register. */
8485 iemNativeRegFreeTmp(pReNative, idxEflReg);
8486
8487 /* Make a copy of the core state now as we start the if-block. */
8488 iemNativeCondStartIfBlock(pReNative, off);
8489
8490 return off;
8491}
8492
8493
8494#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
8495 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
8496 do {
8497
8498/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
8499DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8500{
8501 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8502
8503 /* Get the eflags. */
8504 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8505 kIemNativeGstRegUse_ReadOnly);
8506
8507 /* Test and jump. */
8508 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8509
8510 /* Free but don't flush the EFlags register. */
8511 iemNativeRegFreeTmp(pReNative, idxEflReg);
8512
8513 /* Make a copy of the core state now as we start the if-block. */
8514 iemNativeCondStartIfBlock(pReNative, off);
8515
8516 return off;
8517}
8518
8519
8520#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
8521 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
8522 do {
8523
8524/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
8525DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8526{
8527 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8528
8529 /* Get the eflags. */
8530 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8531 kIemNativeGstRegUse_ReadOnly);
8532
8533 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8534 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8535
8536 /* Test and jump. */
8537 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8538
8539 /* Free but don't flush the EFlags register. */
8540 iemNativeRegFreeTmp(pReNative, idxEflReg);
8541
8542 /* Make a copy of the core state now as we start the if-block. */
8543 iemNativeCondStartIfBlock(pReNative, off);
8544
8545 return off;
8546}
8547
8548
8549#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
8550 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
8551 do {
8552
8553/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
8554DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8555{
8556 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8557
8558 /* Get the eflags. */
8559 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8560 kIemNativeGstRegUse_ReadOnly);
8561
8562 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8563 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8564
8565 /* Test and jump. */
8566 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8567
8568 /* Free but don't flush the EFlags register. */
8569 iemNativeRegFreeTmp(pReNative, idxEflReg);
8570
8571 /* Make a copy of the core state now as we start the if-block. */
8572 iemNativeCondStartIfBlock(pReNative, off);
8573
8574 return off;
8575}
8576
8577
8578#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
8579 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
8580 do {
8581
8582#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
8583 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
8584 do {
8585
8586/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
8587DECL_INLINE_THROW(uint32_t)
8588iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8589 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8590{
8591 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8592
8593 /* Get the eflags. */
8594 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8595 kIemNativeGstRegUse_ReadOnly);
8596
8597 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8598 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8599
8600 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8601 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8602 Assert(iBitNo1 != iBitNo2);
8603
8604#ifdef RT_ARCH_AMD64
8605 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
8606
8607 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8608 if (iBitNo1 > iBitNo2)
8609 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8610 else
8611 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8612 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8613
8614#elif defined(RT_ARCH_ARM64)
8615 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8616 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8617
8618 /* and tmpreg, eflreg, #1<<iBitNo1 */
8619 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8620
8621 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8622 if (iBitNo1 > iBitNo2)
8623 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8624 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8625 else
8626 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8627 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8628
8629 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8630
8631#else
8632# error "Port me"
8633#endif
8634
8635 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8636 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8637 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8638
8639 /* Free but don't flush the EFlags and tmp registers. */
8640 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8641 iemNativeRegFreeTmp(pReNative, idxEflReg);
8642
8643 /* Make a copy of the core state now as we start the if-block. */
8644 iemNativeCondStartIfBlock(pReNative, off);
8645
8646 return off;
8647}
8648
8649
8650#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
8651 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
8652 do {
8653
8654#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
8655 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
8656 do {
8657
8658/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
8659 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
8660DECL_INLINE_THROW(uint32_t)
8661iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
8662 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8663{
8664 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8665
8666 /* We need an if-block label for the non-inverted variant. */
8667 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
8668 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
8669
8670 /* Get the eflags. */
8671 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8672 kIemNativeGstRegUse_ReadOnly);
8673
8674 /* Translate the flag masks to bit numbers. */
8675 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8676 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8677
8678 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8679 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8680 Assert(iBitNo1 != iBitNo);
8681
8682 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8683 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8684 Assert(iBitNo2 != iBitNo);
8685 Assert(iBitNo2 != iBitNo1);
8686
8687#ifdef RT_ARCH_AMD64
8688 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
8689#elif defined(RT_ARCH_ARM64)
8690 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8691#endif
8692
8693 /* Check for the lone bit first. */
8694 if (!fInverted)
8695 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8696 else
8697 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
8698
8699 /* Then extract and compare the other two bits. */
8700#ifdef RT_ARCH_AMD64
8701 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8702 if (iBitNo1 > iBitNo2)
8703 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8704 else
8705 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8706 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8707
8708#elif defined(RT_ARCH_ARM64)
8709 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8710
8711 /* and tmpreg, eflreg, #1<<iBitNo1 */
8712 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8713
8714 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8715 if (iBitNo1 > iBitNo2)
8716 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8717 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8718 else
8719 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8720 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8721
8722 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8723
8724#else
8725# error "Port me"
8726#endif
8727
8728 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8729 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8730 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8731
8732 /* Free but don't flush the EFlags and tmp registers. */
8733 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8734 iemNativeRegFreeTmp(pReNative, idxEflReg);
8735
8736 /* Make a copy of the core state now as we start the if-block. */
8737 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
8738
8739 return off;
8740}
8741
8742
8743#define IEM_MC_IF_CX_IS_NZ() \
8744 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
8745 do {
8746
8747/** Emits code for IEM_MC_IF_CX_IS_NZ. */
8748DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8749{
8750 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8751
8752 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8753 kIemNativeGstRegUse_ReadOnly);
8754 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
8755 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8756
8757 iemNativeCondStartIfBlock(pReNative, off);
8758 return off;
8759}
8760
8761
8762#define IEM_MC_IF_ECX_IS_NZ() \
8763 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
8764 do {
8765
8766#define IEM_MC_IF_RCX_IS_NZ() \
8767 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
8768 do {
8769
8770/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
8771DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8772{
8773 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8774
8775 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8776 kIemNativeGstRegUse_ReadOnly);
8777 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
8778 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8779
8780 iemNativeCondStartIfBlock(pReNative, off);
8781 return off;
8782}
8783
8784
8785#define IEM_MC_IF_CX_IS_NOT_ONE() \
8786 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
8787 do {
8788
8789/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
8790DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8791{
8792 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8793
8794 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8795 kIemNativeGstRegUse_ReadOnly);
8796#ifdef RT_ARCH_AMD64
8797 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8798#else
8799 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8800 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8801 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8802#endif
8803 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8804
8805 iemNativeCondStartIfBlock(pReNative, off);
8806 return off;
8807}
8808
8809
8810#define IEM_MC_IF_ECX_IS_NOT_ONE() \
8811 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
8812 do {
8813
8814#define IEM_MC_IF_RCX_IS_NOT_ONE() \
8815 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
8816 do {
8817
8818/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
8819DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8820{
8821 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8822
8823 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8824 kIemNativeGstRegUse_ReadOnly);
8825 if (f64Bit)
8826 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8827 else
8828 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8829 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8830
8831 iemNativeCondStartIfBlock(pReNative, off);
8832 return off;
8833}
8834
8835
8836#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8837 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
8838 do {
8839
8840#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8841 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
8842 do {
8843
8844/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
8845 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8846DECL_INLINE_THROW(uint32_t)
8847iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
8848{
8849 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8850
8851 /* We have to load both RCX and EFLAGS before we can start branching,
8852 otherwise we'll end up in the else-block with an inconsistent
8853 register allocator state.
8854 Doing EFLAGS first as it's more likely to be loaded, right? */
8855 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8856 kIemNativeGstRegUse_ReadOnly);
8857 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8858 kIemNativeGstRegUse_ReadOnly);
8859
8860 /** @todo we could reduce this to a single branch instruction by spending a
8861 * temporary register and some setnz stuff. Not sure if loops are
8862 * worth it. */
8863 /* Check CX. */
8864#ifdef RT_ARCH_AMD64
8865 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8866#else
8867 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8868 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8869 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8870#endif
8871
8872 /* Check the EFlags bit. */
8873 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8874 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8875 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8876 !fCheckIfSet /*fJmpIfSet*/);
8877
8878 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8879 iemNativeRegFreeTmp(pReNative, idxEflReg);
8880
8881 iemNativeCondStartIfBlock(pReNative, off);
8882 return off;
8883}
8884
8885
8886#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8887 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
8888 do {
8889
8890#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8891 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
8892 do {
8893
8894#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8895 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
8896 do {
8897
8898#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8899 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
8900 do {
8901
8902/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
8903 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
8904 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
8905 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8906DECL_INLINE_THROW(uint32_t)
8907iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8908 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
8909{
8910 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8911
8912 /* We have to load both RCX and EFLAGS before we can start branching,
8913 otherwise we'll end up in the else-block with an inconsistent
8914 register allocator state.
8915 Doing EFLAGS first as it's more likely to be loaded, right? */
8916 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8917 kIemNativeGstRegUse_ReadOnly);
8918 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8919 kIemNativeGstRegUse_ReadOnly);
8920
8921 /** @todo we could reduce this to a single branch instruction by spending a
8922 * temporary register and some setnz stuff. Not sure if loops are
8923 * worth it. */
8924 /* Check RCX/ECX. */
8925 if (f64Bit)
8926 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8927 else
8928 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8929
8930 /* Check the EFlags bit. */
8931 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8932 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8933 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8934 !fCheckIfSet /*fJmpIfSet*/);
8935
8936 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8937 iemNativeRegFreeTmp(pReNative, idxEflReg);
8938
8939 iemNativeCondStartIfBlock(pReNative, off);
8940 return off;
8941}
8942
8943
8944
8945/*********************************************************************************************************************************
8946* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
8947*********************************************************************************************************************************/
8948/** Number of hidden arguments for CIMPL calls.
8949 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
8950#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8951# define IEM_CIMPL_HIDDEN_ARGS 3
8952#else
8953# define IEM_CIMPL_HIDDEN_ARGS 2
8954#endif
8955
8956#define IEM_MC_NOREF(a_Name) \
8957 RT_NOREF_PV(a_Name)
8958
8959#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
8960 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
8961
8962#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
8963 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
8964
8965#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
8966 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
8967
8968#define IEM_MC_LOCAL(a_Type, a_Name) \
8969 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
8970
8971#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
8972 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
8973
8974
8975/**
8976 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
8977 */
8978DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
8979{
8980 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
8981 return IEM_CIMPL_HIDDEN_ARGS;
8982 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
8983 return 1;
8984 return 0;
8985}
8986
8987
8988/**
8989 * Internal work that allocates a variable with kind set to
8990 * kIemNativeVarKind_Invalid and no current stack allocation.
8991 *
8992 * The kind will either be set by the caller or later when the variable is first
8993 * assigned a value.
8994 *
8995 * @returns Unpacked index.
8996 * @internal
8997 */
8998static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
8999{
9000 Assert(cbType > 0 && cbType <= 64);
9001 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
9002 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
9003 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
9004 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9005 pReNative->Core.aVars[idxVar].cbVar = cbType;
9006 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9007 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9008 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
9009 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
9010 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
9011 pReNative->Core.aVars[idxVar].fRegAcquired = false;
9012 pReNative->Core.aVars[idxVar].u.uValue = 0;
9013 return idxVar;
9014}
9015
9016
9017/**
9018 * Internal work that allocates an argument variable w/o setting enmKind.
9019 *
9020 * @returns Unpacked index.
9021 * @internal
9022 */
9023static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9024{
9025 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
9026 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9027 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
9028
9029 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
9030 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
9031 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
9032 return idxVar;
9033}
9034
9035
9036/**
9037 * Gets the stack slot for a stack variable, allocating one if necessary.
9038 *
9039 * Calling this function implies that the stack slot will contain a valid
9040 * variable value. The caller deals with any register currently assigned to the
9041 * variable, typically by spilling it into the stack slot.
9042 *
9043 * @returns The stack slot number.
9044 * @param pReNative The recompiler state.
9045 * @param idxVar The variable.
9046 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
9047 */
9048DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9049{
9050 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9051 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9052 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9053
9054 /* Already got a slot? */
9055 uint8_t const idxStackSlot = pVar->idxStackSlot;
9056 if (idxStackSlot != UINT8_MAX)
9057 {
9058 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
9059 return idxStackSlot;
9060 }
9061
9062 /*
9063 * A single slot is easy to allocate.
9064 * Allocate them from the top end, closest to BP, to reduce the displacement.
9065 */
9066 if (pVar->cbVar <= sizeof(uint64_t))
9067 {
9068 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9069 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9070 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
9071 pVar->idxStackSlot = (uint8_t)iSlot;
9072 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
9073 return (uint8_t)iSlot;
9074 }
9075
9076 /*
9077 * We need more than one stack slot.
9078 *
9079 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
9080 */
9081 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
9082 Assert(pVar->cbVar <= 64);
9083 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
9084 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
9085 uint32_t bmStack = ~pReNative->Core.bmStack;
9086 while (bmStack != UINT32_MAX)
9087 {
9088/** @todo allocate from the top to reduce BP displacement. */
9089 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
9090 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9091 if (!(iSlot & fBitAlignMask))
9092 {
9093 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
9094 {
9095 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
9096 pVar->idxStackSlot = (uint8_t)iSlot;
9097 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9098 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
9099 return (uint8_t)iSlot;
9100 }
9101 }
9102 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
9103 }
9104 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9105}
9106
9107
9108/**
9109 * Changes the variable to a stack variable.
9110 *
9111 * Currently this is s only possible to do the first time the variable is used,
9112 * switching later is can be implemented but not done.
9113 *
9114 * @param pReNative The recompiler state.
9115 * @param idxVar The variable.
9116 * @throws VERR_IEM_VAR_IPE_2
9117 */
9118static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9119{
9120 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9121 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9122 if (pVar->enmKind != kIemNativeVarKind_Stack)
9123 {
9124 /* We could in theory transition from immediate to stack as well, but it
9125 would involve the caller doing work storing the value on the stack. So,
9126 till that's required we only allow transition from invalid. */
9127 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9128 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9129 pVar->enmKind = kIemNativeVarKind_Stack;
9130
9131 /* Note! We don't allocate a stack slot here, that's only done when a
9132 slot is actually needed to hold a variable value. */
9133 }
9134}
9135
9136
9137/**
9138 * Sets it to a variable with a constant value.
9139 *
9140 * This does not require stack storage as we know the value and can always
9141 * reload it, unless of course it's referenced.
9142 *
9143 * @param pReNative The recompiler state.
9144 * @param idxVar The variable.
9145 * @param uValue The immediate value.
9146 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9147 */
9148static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
9149{
9150 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9151 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9152 if (pVar->enmKind != kIemNativeVarKind_Immediate)
9153 {
9154 /* Only simple transitions for now. */
9155 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9156 pVar->enmKind = kIemNativeVarKind_Immediate;
9157 }
9158 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9159
9160 pVar->u.uValue = uValue;
9161 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
9162 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
9163 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
9164}
9165
9166
9167/**
9168 * Sets the variable to a reference (pointer) to @a idxOtherVar.
9169 *
9170 * This does not require stack storage as we know the value and can always
9171 * reload it. Loading is postponed till needed.
9172 *
9173 * @param pReNative The recompiler state.
9174 * @param idxVar The variable. Unpacked.
9175 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
9176 *
9177 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9178 * @internal
9179 */
9180static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
9181{
9182 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
9183 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
9184
9185 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
9186 {
9187 /* Only simple transitions for now. */
9188 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
9189 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9190 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
9191 }
9192 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9193
9194 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
9195
9196 /* Update the other variable, ensure it's a stack variable. */
9197 /** @todo handle variables with const values... that'll go boom now. */
9198 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
9199 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9200}
9201
9202
9203/**
9204 * Sets the variable to a reference (pointer) to a guest register reference.
9205 *
9206 * This does not require stack storage as we know the value and can always
9207 * reload it. Loading is postponed till needed.
9208 *
9209 * @param pReNative The recompiler state.
9210 * @param idxVar The variable.
9211 * @param enmRegClass The class guest registers to reference.
9212 * @param idxReg The register within @a enmRegClass to reference.
9213 *
9214 * @throws VERR_IEM_VAR_IPE_2
9215 */
9216static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9217 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
9218{
9219 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9220 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9221
9222 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
9223 {
9224 /* Only simple transitions for now. */
9225 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9226 pVar->enmKind = kIemNativeVarKind_GstRegRef;
9227 }
9228 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9229
9230 pVar->u.GstRegRef.enmClass = enmRegClass;
9231 pVar->u.GstRegRef.idx = idxReg;
9232}
9233
9234
9235DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9236{
9237 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9238}
9239
9240
9241DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
9242{
9243 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9244
9245 /* Since we're using a generic uint64_t value type, we must truncate it if
9246 the variable is smaller otherwise we may end up with too large value when
9247 scaling up a imm8 w/ sign-extension.
9248
9249 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
9250 in the bios, bx=1) when running on arm, because clang expect 16-bit
9251 register parameters to have bits 16 and up set to zero. Instead of
9252 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
9253 CF value in the result. */
9254 switch (cbType)
9255 {
9256 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9257 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9258 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9259 }
9260 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9261 return idxVar;
9262}
9263
9264
9265DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
9266{
9267 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
9268 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
9269 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
9270 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
9271 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
9272 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9273
9274 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
9275 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
9276 return idxArgVar;
9277}
9278
9279
9280DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9281{
9282 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9283 /* Don't set to stack now, leave that to the first use as for instance
9284 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
9285 return idxVar;
9286}
9287
9288
9289DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
9290{
9291 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9292
9293 /* Since we're using a generic uint64_t value type, we must truncate it if
9294 the variable is smaller otherwise we may end up with too large value when
9295 scaling up a imm8 w/ sign-extension. */
9296 switch (cbType)
9297 {
9298 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9299 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9300 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9301 }
9302 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9303 return idxVar;
9304}
9305
9306
9307/**
9308 * Makes sure variable @a idxVar has a register assigned to it and that it stays
9309 * fixed till we call iemNativeVarRegisterRelease.
9310 *
9311 * @returns The host register number.
9312 * @param pReNative The recompiler state.
9313 * @param idxVar The variable.
9314 * @param poff Pointer to the instruction buffer offset.
9315 * In case a register needs to be freed up or the value
9316 * loaded off the stack.
9317 * @param fInitialized Set if the variable must already have been initialized.
9318 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
9319 * the case.
9320 * @param idxRegPref Preferred register number or UINT8_MAX.
9321 */
9322DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
9323 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
9324{
9325 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9326 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9327 Assert(pVar->cbVar <= 8);
9328 Assert(!pVar->fRegAcquired);
9329
9330 uint8_t idxReg = pVar->idxReg;
9331 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9332 {
9333 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
9334 && pVar->enmKind < kIemNativeVarKind_End);
9335 pVar->fRegAcquired = true;
9336 return idxReg;
9337 }
9338
9339 /*
9340 * If the kind of variable has not yet been set, default to 'stack'.
9341 */
9342 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
9343 && pVar->enmKind < kIemNativeVarKind_End);
9344 if (pVar->enmKind == kIemNativeVarKind_Invalid)
9345 iemNativeVarSetKindToStack(pReNative, idxVar);
9346
9347 /*
9348 * We have to allocate a register for the variable, even if its a stack one
9349 * as we don't know if there are modification being made to it before its
9350 * finalized (todo: analyze and insert hints about that?).
9351 *
9352 * If we can, we try get the correct register for argument variables. This
9353 * is assuming that most argument variables are fetched as close as possible
9354 * to the actual call, so that there aren't any interfering hidden calls
9355 * (memory accesses, etc) inbetween.
9356 *
9357 * If we cannot or it's a variable, we make sure no argument registers
9358 * that will be used by this MC block will be allocated here, and we always
9359 * prefer non-volatile registers to avoid needing to spill stuff for internal
9360 * call.
9361 */
9362 /** @todo Detect too early argument value fetches and warn about hidden
9363 * calls causing less optimal code to be generated in the python script. */
9364
9365 uint8_t const uArgNo = pVar->uArgNo;
9366 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
9367 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
9368 {
9369 idxReg = g_aidxIemNativeCallRegs[uArgNo];
9370 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9371 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
9372 }
9373 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
9374 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
9375 {
9376 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
9377 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
9378 & ~pReNative->Core.bmHstRegsWithGstShadow
9379 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
9380 & fNotArgsMask;
9381 if (fRegs)
9382 {
9383 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
9384 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
9385 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
9386 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
9387 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
9388 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9389 }
9390 else
9391 {
9392 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
9393 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
9394 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
9395 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9396 }
9397 }
9398 else
9399 {
9400 idxReg = idxRegPref;
9401 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9402 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
9403 }
9404 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9405 pVar->idxReg = idxReg;
9406
9407 /*
9408 * Load it off the stack if we've got a stack slot.
9409 */
9410 uint8_t const idxStackSlot = pVar->idxStackSlot;
9411 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9412 {
9413 Assert(fInitialized);
9414 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9415 switch (pVar->cbVar)
9416 {
9417 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
9418 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
9419 case 3: AssertFailed(); RT_FALL_THRU();
9420 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
9421 default: AssertFailed(); RT_FALL_THRU();
9422 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
9423 }
9424 }
9425 else
9426 {
9427 Assert(idxStackSlot == UINT8_MAX);
9428 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9429 }
9430 pVar->fRegAcquired = true;
9431 return idxReg;
9432}
9433
9434
9435/**
9436 * The value of variable @a idxVar will be written in full to the @a enmGstReg
9437 * guest register.
9438 *
9439 * This function makes sure there is a register for it and sets it to be the
9440 * current shadow copy of @a enmGstReg.
9441 *
9442 * @returns The host register number.
9443 * @param pReNative The recompiler state.
9444 * @param idxVar The variable.
9445 * @param enmGstReg The guest register this variable will be written to
9446 * after this call.
9447 * @param poff Pointer to the instruction buffer offset.
9448 * In case a register needs to be freed up or if the
9449 * variable content needs to be loaded off the stack.
9450 *
9451 * @note We DO NOT expect @a idxVar to be an argument variable,
9452 * because we can only in the commit stage of an instruction when this
9453 * function is used.
9454 */
9455DECL_HIDDEN_THROW(uint8_t)
9456iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
9457{
9458 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9459 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9460 Assert(!pVar->fRegAcquired);
9461 AssertMsgStmt( pVar->cbVar <= 8
9462 && ( pVar->enmKind == kIemNativeVarKind_Immediate
9463 || pVar->enmKind == kIemNativeVarKind_Stack),
9464 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
9465 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
9466 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9467
9468 /*
9469 * This shouldn't ever be used for arguments, unless it's in a weird else
9470 * branch that doesn't do any calling and even then it's questionable.
9471 *
9472 * However, in case someone writes crazy wrong MC code and does register
9473 * updates before making calls, just use the regular register allocator to
9474 * ensure we get a register suitable for the intended argument number.
9475 */
9476 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
9477
9478 /*
9479 * If there is already a register for the variable, we transfer/set the
9480 * guest shadow copy assignment to it.
9481 */
9482 uint8_t idxReg = pVar->idxReg;
9483 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9484 {
9485 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
9486 {
9487 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
9488 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
9489 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
9490 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
9491 }
9492 else
9493 {
9494 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
9495 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
9496 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
9497 }
9498 /** @todo figure this one out. We need some way of making sure the register isn't
9499 * modified after this point, just in case we start writing crappy MC code. */
9500 pVar->enmGstReg = enmGstReg;
9501 pVar->fRegAcquired = true;
9502 return idxReg;
9503 }
9504 Assert(pVar->uArgNo == UINT8_MAX);
9505
9506 /*
9507 * Because this is supposed to be the commit stage, we're just tag along with the
9508 * temporary register allocator and upgrade it to a variable register.
9509 */
9510 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
9511 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
9512 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
9513 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
9514 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
9515 pVar->idxReg = idxReg;
9516
9517 /*
9518 * Now we need to load the register value.
9519 */
9520 if (pVar->enmKind == kIemNativeVarKind_Immediate)
9521 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
9522 else
9523 {
9524 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9525 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9526 switch (pVar->cbVar)
9527 {
9528 case sizeof(uint64_t):
9529 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
9530 break;
9531 case sizeof(uint32_t):
9532 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
9533 break;
9534 case sizeof(uint16_t):
9535 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
9536 break;
9537 case sizeof(uint8_t):
9538 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
9539 break;
9540 default:
9541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9542 }
9543 }
9544
9545 pVar->fRegAcquired = true;
9546 return idxReg;
9547}
9548
9549
9550/**
9551 * Sets the host register for @a idxVarRc to @a idxReg.
9552 *
9553 * The register must not be allocated. Any guest register shadowing will be
9554 * implictly dropped by this call.
9555 *
9556 * The variable must not have any register associated with it (causes
9557 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
9558 * implied.
9559 *
9560 * @returns idxReg
9561 * @param pReNative The recompiler state.
9562 * @param idxVar The variable.
9563 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
9564 * @param off For recording in debug info.
9565 *
9566 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
9567 */
9568DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
9569{
9570 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9571 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9572 Assert(!pVar->fRegAcquired);
9573 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
9574 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
9575 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
9576
9577 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
9578 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9579
9580 iemNativeVarSetKindToStack(pReNative, idxVar);
9581 pVar->idxReg = idxReg;
9582
9583 return idxReg;
9584}
9585
9586
9587/**
9588 * A convenient helper function.
9589 */
9590DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9591 uint8_t idxReg, uint32_t *poff)
9592{
9593 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
9594 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
9595 return idxReg;
9596}
9597
9598
9599/**
9600 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
9601 *
9602 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
9603 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
9604 * requirement of flushing anything in volatile host registers when making a
9605 * call.
9606 *
9607 * @returns New @a off value.
9608 * @param pReNative The recompiler state.
9609 * @param off The code buffer position.
9610 * @param fHstRegsNotToSave Set of registers not to save & restore.
9611 */
9612DECL_HIDDEN_THROW(uint32_t)
9613iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9614{
9615 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9616 if (fHstRegs)
9617 {
9618 do
9619 {
9620 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9621 fHstRegs &= ~RT_BIT_32(idxHstReg);
9622
9623 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9624 {
9625 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9626 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9627 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9628 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9629 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9630 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9631 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9632 {
9633 case kIemNativeVarKind_Stack:
9634 {
9635 /* Temporarily spill the variable register. */
9636 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9637 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9638 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9639 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9640 continue;
9641 }
9642
9643 case kIemNativeVarKind_Immediate:
9644 case kIemNativeVarKind_VarRef:
9645 case kIemNativeVarKind_GstRegRef:
9646 /* It is weird to have any of these loaded at this point. */
9647 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9648 continue;
9649
9650 case kIemNativeVarKind_End:
9651 case kIemNativeVarKind_Invalid:
9652 break;
9653 }
9654 AssertFailed();
9655 }
9656 else
9657 {
9658 /*
9659 * Allocate a temporary stack slot and spill the register to it.
9660 */
9661 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9662 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
9663 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9664 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
9665 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
9666 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9667 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9668 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9669 }
9670 } while (fHstRegs);
9671 }
9672 return off;
9673}
9674
9675
9676/**
9677 * Emit code to restore volatile registers after to a call to a helper.
9678 *
9679 * @returns New @a off value.
9680 * @param pReNative The recompiler state.
9681 * @param off The code buffer position.
9682 * @param fHstRegsNotToSave Set of registers not to save & restore.
9683 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
9684 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
9685 */
9686DECL_HIDDEN_THROW(uint32_t)
9687iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9688{
9689 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9690 if (fHstRegs)
9691 {
9692 do
9693 {
9694 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9695 fHstRegs &= ~RT_BIT_32(idxHstReg);
9696
9697 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9698 {
9699 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9700 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9701 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9702 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9703 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9704 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9705 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9706 {
9707 case kIemNativeVarKind_Stack:
9708 {
9709 /* Unspill the variable register. */
9710 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9711 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
9712 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9713 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9714 continue;
9715 }
9716
9717 case kIemNativeVarKind_Immediate:
9718 case kIemNativeVarKind_VarRef:
9719 case kIemNativeVarKind_GstRegRef:
9720 /* It is weird to have any of these loaded at this point. */
9721 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9722 continue;
9723
9724 case kIemNativeVarKind_End:
9725 case kIemNativeVarKind_Invalid:
9726 break;
9727 }
9728 AssertFailed();
9729 }
9730 else
9731 {
9732 /*
9733 * Restore from temporary stack slot.
9734 */
9735 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
9736 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
9737 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
9738 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
9739
9740 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9741 }
9742 } while (fHstRegs);
9743 }
9744 return off;
9745}
9746
9747
9748/**
9749 * Worker that frees the stack slots for variable @a idxVar if any allocated.
9750 *
9751 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
9752 *
9753 * ASSUMES that @a idxVar is valid and unpacked.
9754 */
9755DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9756{
9757 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
9758 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
9759 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9760 {
9761 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
9762 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
9763 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
9764 Assert(cSlots > 0);
9765 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
9766 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9767 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
9768 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
9769 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9770 }
9771 else
9772 Assert(idxStackSlot == UINT8_MAX);
9773}
9774
9775
9776/**
9777 * Worker that frees a single variable.
9778 *
9779 * ASSUMES that @a idxVar is valid and unpacked.
9780 */
9781DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9782{
9783 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
9784 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
9785 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
9786
9787 /* Free the host register first if any assigned. */
9788 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9789 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9790 {
9791 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9792 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9793 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9794 }
9795
9796 /* Free argument mapping. */
9797 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
9798 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
9799 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
9800
9801 /* Free the stack slots. */
9802 iemNativeVarFreeStackSlots(pReNative, idxVar);
9803
9804 /* Free the actual variable. */
9805 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9806 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9807}
9808
9809
9810/**
9811 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
9812 */
9813DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
9814{
9815 while (bmVars != 0)
9816 {
9817 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9818 bmVars &= ~RT_BIT_32(idxVar);
9819
9820#if 1 /** @todo optimize by simplifying this later... */
9821 iemNativeVarFreeOneWorker(pReNative, idxVar);
9822#else
9823 /* Only need to free the host register, the rest is done as bulk updates below. */
9824 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9825 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9826 {
9827 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9828 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9829 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9830 }
9831#endif
9832 }
9833#if 0 /** @todo optimize by simplifying this later... */
9834 pReNative->Core.bmVars = 0;
9835 pReNative->Core.bmStack = 0;
9836 pReNative->Core.u64ArgVars = UINT64_MAX;
9837#endif
9838}
9839
9840
9841/**
9842 * This is called by IEM_MC_END() to clean up all variables.
9843 */
9844DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
9845{
9846 uint32_t const bmVars = pReNative->Core.bmVars;
9847 if (bmVars != 0)
9848 iemNativeVarFreeAllSlow(pReNative, bmVars);
9849 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9850 Assert(pReNative->Core.bmStack == 0);
9851}
9852
9853
9854#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
9855
9856/**
9857 * This is called by IEM_MC_FREE_LOCAL.
9858 */
9859DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9860{
9861 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9862 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
9863 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9864}
9865
9866
9867#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
9868
9869/**
9870 * This is called by IEM_MC_FREE_ARG.
9871 */
9872DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9873{
9874 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9875 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
9876 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9877}
9878
9879
9880#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
9881
9882/**
9883 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
9884 */
9885DECL_INLINE_THROW(uint32_t)
9886iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
9887{
9888 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
9889 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
9890 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9891 Assert( pVarDst->cbVar == sizeof(uint16_t)
9892 || pVarDst->cbVar == sizeof(uint32_t));
9893
9894 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
9895 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
9896 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
9897 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
9898 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9899
9900 Assert(pVarDst->cbVar < pVarSrc->cbVar);
9901
9902 /*
9903 * Special case for immediates.
9904 */
9905 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
9906 {
9907 switch (pVarDst->cbVar)
9908 {
9909 case sizeof(uint16_t):
9910 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
9911 break;
9912 case sizeof(uint32_t):
9913 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
9914 break;
9915 default: AssertFailed(); break;
9916 }
9917 }
9918 else
9919 {
9920 /*
9921 * The generic solution for now.
9922 */
9923 /** @todo optimize this by having the python script make sure the source
9924 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
9925 * statement. Then we could just transfer the register assignments. */
9926 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
9927 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
9928 switch (pVarDst->cbVar)
9929 {
9930 case sizeof(uint16_t):
9931 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
9932 break;
9933 case sizeof(uint32_t):
9934 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
9935 break;
9936 default: AssertFailed(); break;
9937 }
9938 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
9939 iemNativeVarRegisterRelease(pReNative, idxVarDst);
9940 }
9941 return off;
9942}
9943
9944
9945
9946/*********************************************************************************************************************************
9947* Emitters for IEM_MC_CALL_CIMPL_XXX *
9948*********************************************************************************************************************************/
9949
9950/**
9951 * Emits code to load a reference to the given guest register into @a idxGprDst.
9952 */
9953DECL_INLINE_THROW(uint32_t)
9954iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
9955 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
9956{
9957#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9958 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
9959#endif
9960
9961 /*
9962 * Get the offset relative to the CPUMCTX structure.
9963 */
9964 uint32_t offCpumCtx;
9965 switch (enmClass)
9966 {
9967 case kIemNativeGstRegRef_Gpr:
9968 Assert(idxRegInClass < 16);
9969 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
9970 break;
9971
9972 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
9973 Assert(idxRegInClass < 4);
9974 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
9975 break;
9976
9977 case kIemNativeGstRegRef_EFlags:
9978 Assert(idxRegInClass == 0);
9979 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
9980 break;
9981
9982 case kIemNativeGstRegRef_MxCsr:
9983 Assert(idxRegInClass == 0);
9984 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
9985 break;
9986
9987 case kIemNativeGstRegRef_FpuReg:
9988 Assert(idxRegInClass < 8);
9989 AssertFailed(); /** @todo what kind of indexing? */
9990 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9991 break;
9992
9993 case kIemNativeGstRegRef_MReg:
9994 Assert(idxRegInClass < 8);
9995 AssertFailed(); /** @todo what kind of indexing? */
9996 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9997 break;
9998
9999 case kIemNativeGstRegRef_XReg:
10000 Assert(idxRegInClass < 16);
10001 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
10002 break;
10003
10004 default:
10005 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
10006 }
10007
10008 /*
10009 * Load the value into the destination register.
10010 */
10011#ifdef RT_ARCH_AMD64
10012 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
10013
10014#elif defined(RT_ARCH_ARM64)
10015 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10016 Assert(offCpumCtx < 4096);
10017 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
10018
10019#else
10020# error "Port me!"
10021#endif
10022
10023 return off;
10024}
10025
10026
10027/**
10028 * Common code for CIMPL and AIMPL calls.
10029 *
10030 * These are calls that uses argument variables and such. They should not be
10031 * confused with internal calls required to implement an MC operation,
10032 * like a TLB load and similar.
10033 *
10034 * Upon return all that is left to do is to load any hidden arguments and
10035 * perform the call. All argument variables are freed.
10036 *
10037 * @returns New code buffer offset; throws VBox status code on error.
10038 * @param pReNative The native recompile state.
10039 * @param off The code buffer offset.
10040 * @param cArgs The total nubmer of arguments (includes hidden
10041 * count).
10042 * @param cHiddenArgs The number of hidden arguments. The hidden
10043 * arguments must not have any variable declared for
10044 * them, whereas all the regular arguments must
10045 * (tstIEMCheckMc ensures this).
10046 */
10047DECL_HIDDEN_THROW(uint32_t)
10048iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
10049{
10050#ifdef VBOX_STRICT
10051 /*
10052 * Assert sanity.
10053 */
10054 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
10055 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
10056 for (unsigned i = 0; i < cHiddenArgs; i++)
10057 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
10058 for (unsigned i = cHiddenArgs; i < cArgs; i++)
10059 {
10060 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
10061 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
10062 }
10063 iemNativeRegAssertSanity(pReNative);
10064#endif
10065
10066 /* We don't know what the called function makes use of, so flush any pending register writes. */
10067 off = iemNativeRegFlushPendingWrites(pReNative, off);
10068
10069 /*
10070 * Before we do anything else, go over variables that are referenced and
10071 * make sure they are not in a register.
10072 */
10073 uint32_t bmVars = pReNative->Core.bmVars;
10074 if (bmVars)
10075 {
10076 do
10077 {
10078 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
10079 bmVars &= ~RT_BIT_32(idxVar);
10080
10081 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
10082 {
10083 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
10084 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
10085 {
10086 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
10087 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
10088 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
10089 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
10090 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
10091
10092 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
10093 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
10094 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
10095 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
10096 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
10097 }
10098 }
10099 } while (bmVars != 0);
10100#if 0 //def VBOX_STRICT
10101 iemNativeRegAssertSanity(pReNative);
10102#endif
10103 }
10104
10105 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
10106
10107 /*
10108 * First, go over the host registers that will be used for arguments and make
10109 * sure they either hold the desired argument or are free.
10110 */
10111 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
10112 {
10113 for (uint32_t i = 0; i < cRegArgs; i++)
10114 {
10115 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10116 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10117 {
10118 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
10119 {
10120 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
10121 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
10122 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
10123 Assert(pVar->idxReg == idxArgReg);
10124 uint8_t const uArgNo = pVar->uArgNo;
10125 if (uArgNo == i)
10126 { /* prefect */ }
10127 /* The variable allocator logic should make sure this is impossible,
10128 except for when the return register is used as a parameter (ARM,
10129 but not x86). */
10130#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
10131 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
10132 {
10133# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10134# error "Implement this"
10135# endif
10136 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
10137 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
10138 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
10139 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10140 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
10141 }
10142#endif
10143 else
10144 {
10145 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10146
10147 if (pVar->enmKind == kIemNativeVarKind_Stack)
10148 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
10149 else
10150 {
10151 /* just free it, can be reloaded if used again */
10152 pVar->idxReg = UINT8_MAX;
10153 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
10154 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
10155 }
10156 }
10157 }
10158 else
10159 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
10160 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
10161 }
10162 }
10163#if 0 //def VBOX_STRICT
10164 iemNativeRegAssertSanity(pReNative);
10165#endif
10166 }
10167
10168 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
10169
10170#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10171 /*
10172 * If there are any stack arguments, make sure they are in their place as well.
10173 *
10174 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
10175 * the caller) be loading it later and it must be free (see first loop).
10176 */
10177 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
10178 {
10179 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
10180 {
10181 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10182 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
10183 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10184 {
10185 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
10186 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
10187 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
10188 pVar->idxReg = UINT8_MAX;
10189 }
10190 else
10191 {
10192 /* Use ARG0 as temp for stuff we need registers for. */
10193 switch (pVar->enmKind)
10194 {
10195 case kIemNativeVarKind_Stack:
10196 {
10197 uint8_t const idxStackSlot = pVar->idxStackSlot;
10198 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10199 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
10200 iemNativeStackCalcBpDisp(idxStackSlot));
10201 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10202 continue;
10203 }
10204
10205 case kIemNativeVarKind_Immediate:
10206 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
10207 continue;
10208
10209 case kIemNativeVarKind_VarRef:
10210 {
10211 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10212 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10213 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10214 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10215 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10216 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10217 {
10218 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10219 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10220 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10221 }
10222 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10223 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10224 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
10225 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10226 continue;
10227 }
10228
10229 case kIemNativeVarKind_GstRegRef:
10230 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
10231 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10232 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10233 continue;
10234
10235 case kIemNativeVarKind_Invalid:
10236 case kIemNativeVarKind_End:
10237 break;
10238 }
10239 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10240 }
10241 }
10242# if 0 //def VBOX_STRICT
10243 iemNativeRegAssertSanity(pReNative);
10244# endif
10245 }
10246#else
10247 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
10248#endif
10249
10250 /*
10251 * Make sure the argument variables are loaded into their respective registers.
10252 *
10253 * We can optimize this by ASSUMING that any register allocations are for
10254 * registeres that have already been loaded and are ready. The previous step
10255 * saw to that.
10256 */
10257 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
10258 {
10259 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10260 {
10261 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10262 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10263 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
10264 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
10265 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
10266 else
10267 {
10268 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10269 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10270 {
10271 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
10272 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
10273 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
10274 | RT_BIT_32(idxArgReg);
10275 pVar->idxReg = idxArgReg;
10276 }
10277 else
10278 {
10279 /* Use ARG0 as temp for stuff we need registers for. */
10280 switch (pVar->enmKind)
10281 {
10282 case kIemNativeVarKind_Stack:
10283 {
10284 uint8_t const idxStackSlot = pVar->idxStackSlot;
10285 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10286 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
10287 continue;
10288 }
10289
10290 case kIemNativeVarKind_Immediate:
10291 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
10292 continue;
10293
10294 case kIemNativeVarKind_VarRef:
10295 {
10296 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10297 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10298 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
10299 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10300 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10301 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10302 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10303 {
10304 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10305 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10306 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10307 }
10308 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10309 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10310 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
10311 continue;
10312 }
10313
10314 case kIemNativeVarKind_GstRegRef:
10315 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
10316 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10317 continue;
10318
10319 case kIemNativeVarKind_Invalid:
10320 case kIemNativeVarKind_End:
10321 break;
10322 }
10323 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10324 }
10325 }
10326 }
10327#if 0 //def VBOX_STRICT
10328 iemNativeRegAssertSanity(pReNative);
10329#endif
10330 }
10331#ifdef VBOX_STRICT
10332 else
10333 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10334 {
10335 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
10336 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
10337 }
10338#endif
10339
10340 /*
10341 * Free all argument variables (simplified).
10342 * Their lifetime always expires with the call they are for.
10343 */
10344 /** @todo Make the python script check that arguments aren't used after
10345 * IEM_MC_CALL_XXXX. */
10346 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
10347 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
10348 * an argument value. There is also some FPU stuff. */
10349 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
10350 {
10351 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
10352 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
10353
10354 /* no need to free registers: */
10355 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
10356 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
10357 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
10358 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
10359 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
10360 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
10361
10362 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
10363 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
10364 iemNativeVarFreeStackSlots(pReNative, idxVar);
10365 }
10366 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
10367
10368 /*
10369 * Flush volatile registers as we make the call.
10370 */
10371 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
10372
10373 return off;
10374}
10375
10376
10377/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
10378DECL_HIDDEN_THROW(uint32_t)
10379iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
10380 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
10381
10382{
10383 /*
10384 * Do all the call setup and cleanup.
10385 */
10386 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
10387
10388 /*
10389 * Load the two or three hidden arguments.
10390 */
10391#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10392 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
10393 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10394 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
10395#else
10396 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10397 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
10398#endif
10399
10400 /*
10401 * Make the call and check the return code.
10402 *
10403 * Shadow PC copies are always flushed here, other stuff depends on flags.
10404 * Segment and general purpose registers are explictily flushed via the
10405 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
10406 * macros.
10407 */
10408 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
10409#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10410 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
10411#endif
10412 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
10413 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
10414 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
10415 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
10416
10417 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
10418}
10419
10420
10421#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
10422 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
10423
10424/** Emits code for IEM_MC_CALL_CIMPL_1. */
10425DECL_INLINE_THROW(uint32_t)
10426iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10427 uintptr_t pfnCImpl, uint8_t idxArg0)
10428{
10429 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10430 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
10431}
10432
10433
10434#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
10435 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
10436
10437/** Emits code for IEM_MC_CALL_CIMPL_2. */
10438DECL_INLINE_THROW(uint32_t)
10439iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10440 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
10441{
10442 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10443 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10444 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
10445}
10446
10447
10448#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
10449 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10450 (uintptr_t)a_pfnCImpl, a0, a1, a2)
10451
10452/** Emits code for IEM_MC_CALL_CIMPL_3. */
10453DECL_INLINE_THROW(uint32_t)
10454iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10455 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10456{
10457 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10458 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10459 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10460 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
10461}
10462
10463
10464#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
10465 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10466 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
10467
10468/** Emits code for IEM_MC_CALL_CIMPL_4. */
10469DECL_INLINE_THROW(uint32_t)
10470iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10471 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10472{
10473 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10474 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10475 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10476 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10477 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
10478}
10479
10480
10481#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
10482 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10483 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
10484
10485/** Emits code for IEM_MC_CALL_CIMPL_4. */
10486DECL_INLINE_THROW(uint32_t)
10487iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10488 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
10489{
10490 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10491 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10492 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10493 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10494 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
10495 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
10496}
10497
10498
10499/** Recompiler debugging: Flush guest register shadow copies. */
10500#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
10501
10502
10503
10504/*********************************************************************************************************************************
10505* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
10506*********************************************************************************************************************************/
10507
10508/**
10509 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
10510 */
10511DECL_INLINE_THROW(uint32_t)
10512iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10513 uintptr_t pfnAImpl, uint8_t cArgs)
10514{
10515 if (idxVarRc != UINT8_MAX)
10516 {
10517 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
10518 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
10519 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
10520 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
10521 }
10522
10523 /*
10524 * Do all the call setup and cleanup.
10525 */
10526 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
10527
10528 /*
10529 * Make the call and update the return code variable if we've got one.
10530 */
10531 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10532 if (idxVarRc != UINT8_MAX)
10533 {
10534off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
10535 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
10536 }
10537
10538 return off;
10539}
10540
10541
10542
10543#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
10544 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
10545
10546#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
10547 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
10548
10549/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
10550DECL_INLINE_THROW(uint32_t)
10551iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
10552{
10553 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
10554}
10555
10556
10557#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
10558 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
10559
10560#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
10561 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
10562
10563/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
10564DECL_INLINE_THROW(uint32_t)
10565iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
10566{
10567 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10568 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
10569}
10570
10571
10572#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
10573 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
10574
10575#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
10576 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
10577
10578/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
10579DECL_INLINE_THROW(uint32_t)
10580iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10581 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10582{
10583 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10584 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10585 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
10586}
10587
10588
10589#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
10590 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
10591
10592#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
10593 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
10594
10595/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
10596DECL_INLINE_THROW(uint32_t)
10597iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10598 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10599{
10600 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10601 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10602 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10603 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
10604}
10605
10606
10607#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
10608 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10609
10610#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
10611 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10612
10613/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
10614DECL_INLINE_THROW(uint32_t)
10615iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10616 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10617{
10618 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10619 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10620 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10621 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
10622 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
10623}
10624
10625
10626
10627/*********************************************************************************************************************************
10628* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
10629*********************************************************************************************************************************/
10630
10631#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
10632 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
10633
10634#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10635 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
10636
10637#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10638 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
10639
10640#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10641 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
10642
10643
10644/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
10645 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
10646DECL_INLINE_THROW(uint32_t)
10647iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
10648{
10649 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10650 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10651 Assert(iGRegEx < 20);
10652
10653 /* Same discussion as in iemNativeEmitFetchGregU16 */
10654 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10655 kIemNativeGstRegUse_ReadOnly);
10656
10657 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10658 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10659
10660 /* The value is zero-extended to the full 64-bit host register width. */
10661 if (iGRegEx < 16)
10662 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10663 else
10664 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10665
10666 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10667 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10668 return off;
10669}
10670
10671
10672#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10673 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
10674
10675#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10676 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
10677
10678#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10679 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
10680
10681/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
10682DECL_INLINE_THROW(uint32_t)
10683iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
10684{
10685 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10686 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10687 Assert(iGRegEx < 20);
10688
10689 /* Same discussion as in iemNativeEmitFetchGregU16 */
10690 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10691 kIemNativeGstRegUse_ReadOnly);
10692
10693 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10694 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10695
10696 if (iGRegEx < 16)
10697 {
10698 switch (cbSignExtended)
10699 {
10700 case sizeof(uint16_t):
10701 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10702 break;
10703 case sizeof(uint32_t):
10704 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10705 break;
10706 case sizeof(uint64_t):
10707 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10708 break;
10709 default: AssertFailed(); break;
10710 }
10711 }
10712 else
10713 {
10714 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10715 switch (cbSignExtended)
10716 {
10717 case sizeof(uint16_t):
10718 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10719 break;
10720 case sizeof(uint32_t):
10721 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10722 break;
10723 case sizeof(uint64_t):
10724 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10725 break;
10726 default: AssertFailed(); break;
10727 }
10728 }
10729
10730 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10731 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10732 return off;
10733}
10734
10735
10736
10737#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
10738 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
10739
10740#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
10741 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10742
10743#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
10744 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10745
10746/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
10747DECL_INLINE_THROW(uint32_t)
10748iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10749{
10750 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10751 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10752 Assert(iGReg < 16);
10753
10754 /*
10755 * We can either just load the low 16-bit of the GPR into a host register
10756 * for the variable, or we can do so via a shadow copy host register. The
10757 * latter will avoid having to reload it if it's being stored later, but
10758 * will waste a host register if it isn't touched again. Since we don't
10759 * know what going to happen, we choose the latter for now.
10760 */
10761 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10762 kIemNativeGstRegUse_ReadOnly);
10763
10764 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10765 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10766 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10767 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10768
10769 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10770 return off;
10771}
10772
10773
10774#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
10775 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10776
10777#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
10778 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10779
10780/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
10781DECL_INLINE_THROW(uint32_t)
10782iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
10783{
10784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10785 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10786 Assert(iGReg < 16);
10787
10788 /*
10789 * We can either just load the low 16-bit of the GPR into a host register
10790 * for the variable, or we can do so via a shadow copy host register. The
10791 * latter will avoid having to reload it if it's being stored later, but
10792 * will waste a host register if it isn't touched again. Since we don't
10793 * know what going to happen, we choose the latter for now.
10794 */
10795 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10796 kIemNativeGstRegUse_ReadOnly);
10797
10798 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10799 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10800 if (cbSignExtended == sizeof(uint32_t))
10801 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10802 else
10803 {
10804 Assert(cbSignExtended == sizeof(uint64_t));
10805 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10806 }
10807 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10808
10809 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10810 return off;
10811}
10812
10813
10814#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
10815 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
10816
10817#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
10818 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
10819
10820/** Emits code for IEM_MC_FETCH_GREG_U32. */
10821DECL_INLINE_THROW(uint32_t)
10822iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10823{
10824 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10825 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10826 Assert(iGReg < 16);
10827
10828 /*
10829 * We can either just load the low 16-bit of the GPR into a host register
10830 * for the variable, or we can do so via a shadow copy host register. The
10831 * latter will avoid having to reload it if it's being stored later, but
10832 * will waste a host register if it isn't touched again. Since we don't
10833 * know what going to happen, we choose the latter for now.
10834 */
10835 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10836 kIemNativeGstRegUse_ReadOnly);
10837
10838 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10839 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10840 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10841 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10842
10843 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10844 return off;
10845}
10846
10847
10848#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
10849 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
10850
10851/** Emits code for IEM_MC_FETCH_GREG_U32. */
10852DECL_INLINE_THROW(uint32_t)
10853iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10854{
10855 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10856 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10857 Assert(iGReg < 16);
10858
10859 /*
10860 * We can either just load the low 32-bit of the GPR into a host register
10861 * for the variable, or we can do so via a shadow copy host register. The
10862 * latter will avoid having to reload it if it's being stored later, but
10863 * will waste a host register if it isn't touched again. Since we don't
10864 * know what going to happen, we choose the latter for now.
10865 */
10866 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10867 kIemNativeGstRegUse_ReadOnly);
10868
10869 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10870 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10871 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10872 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10873
10874 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10875 return off;
10876}
10877
10878
10879#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
10880 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10881
10882#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
10883 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10884
10885/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
10886 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
10887DECL_INLINE_THROW(uint32_t)
10888iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10889{
10890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10891 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10892 Assert(iGReg < 16);
10893
10894 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10895 kIemNativeGstRegUse_ReadOnly);
10896
10897 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10898 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10899 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
10900 /** @todo name the register a shadow one already? */
10901 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10902
10903 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10904 return off;
10905}
10906
10907
10908
10909/*********************************************************************************************************************************
10910* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
10911*********************************************************************************************************************************/
10912
10913#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
10914 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
10915
10916/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
10917DECL_INLINE_THROW(uint32_t)
10918iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
10919{
10920 Assert(iGRegEx < 20);
10921 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10922 kIemNativeGstRegUse_ForUpdate);
10923#ifdef RT_ARCH_AMD64
10924 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
10925
10926 /* To the lowest byte of the register: mov r8, imm8 */
10927 if (iGRegEx < 16)
10928 {
10929 if (idxGstTmpReg >= 8)
10930 pbCodeBuf[off++] = X86_OP_REX_B;
10931 else if (idxGstTmpReg >= 4)
10932 pbCodeBuf[off++] = X86_OP_REX;
10933 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
10934 pbCodeBuf[off++] = u8Value;
10935 }
10936 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
10937 else if (idxGstTmpReg < 4)
10938 {
10939 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
10940 pbCodeBuf[off++] = u8Value;
10941 }
10942 else
10943 {
10944 /* ror reg64, 8 */
10945 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
10946 pbCodeBuf[off++] = 0xc1;
10947 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10948 pbCodeBuf[off++] = 8;
10949
10950 /* mov reg8, imm8 */
10951 if (idxGstTmpReg >= 8)
10952 pbCodeBuf[off++] = X86_OP_REX_B;
10953 else if (idxGstTmpReg >= 4)
10954 pbCodeBuf[off++] = X86_OP_REX;
10955 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
10956 pbCodeBuf[off++] = u8Value;
10957
10958 /* rol reg64, 8 */
10959 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
10960 pbCodeBuf[off++] = 0xc1;
10961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10962 pbCodeBuf[off++] = 8;
10963 }
10964
10965#elif defined(RT_ARCH_ARM64)
10966 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
10967 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10968 if (iGRegEx < 16)
10969 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
10970 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
10971 else
10972 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
10973 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
10974 iemNativeRegFreeTmp(pReNative, idxImmReg);
10975
10976#else
10977# error "Port me!"
10978#endif
10979
10980 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10981
10982 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
10983
10984 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10985 return off;
10986}
10987
10988
10989#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
10990 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
10991
10992/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
10993DECL_INLINE_THROW(uint32_t)
10994iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
10995{
10996 Assert(iGRegEx < 20);
10997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10998
10999 /*
11000 * If it's a constant value (unlikely) we treat this as a
11001 * IEM_MC_STORE_GREG_U8_CONST statement.
11002 */
11003 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11004 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11005 { /* likely */ }
11006 else
11007 {
11008 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11009 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11010 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
11011 }
11012
11013 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
11014 kIemNativeGstRegUse_ForUpdate);
11015 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11016
11017#ifdef RT_ARCH_AMD64
11018 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
11019 if (iGRegEx < 16)
11020 {
11021 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
11022 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11023 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11024 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11025 pbCodeBuf[off++] = X86_OP_REX;
11026 pbCodeBuf[off++] = 0x8a;
11027 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11028 }
11029 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
11030 else if (idxGstTmpReg < 4 && idxVarReg < 4)
11031 {
11032 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
11033 pbCodeBuf[off++] = 0x8a;
11034 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
11035 }
11036 else
11037 {
11038 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
11039
11040 /* ror reg64, 8 */
11041 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11042 pbCodeBuf[off++] = 0xc1;
11043 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11044 pbCodeBuf[off++] = 8;
11045
11046 /* mov reg8, reg8(r/m) */
11047 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11048 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11049 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11050 pbCodeBuf[off++] = X86_OP_REX;
11051 pbCodeBuf[off++] = 0x8a;
11052 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11053
11054 /* rol reg64, 8 */
11055 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11056 pbCodeBuf[off++] = 0xc1;
11057 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11058 pbCodeBuf[off++] = 8;
11059 }
11060
11061#elif defined(RT_ARCH_ARM64)
11062 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
11063 or
11064 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
11065 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11066 if (iGRegEx < 16)
11067 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
11068 else
11069 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
11070
11071#else
11072# error "Port me!"
11073#endif
11074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11075
11076 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11077
11078 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11079 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11080 return off;
11081}
11082
11083
11084
11085#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
11086 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
11087
11088/** Emits code for IEM_MC_STORE_GREG_U16. */
11089DECL_INLINE_THROW(uint32_t)
11090iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
11091{
11092 Assert(iGReg < 16);
11093 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11094 kIemNativeGstRegUse_ForUpdate);
11095#ifdef RT_ARCH_AMD64
11096 /* mov reg16, imm16 */
11097 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11098 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11099 if (idxGstTmpReg >= 8)
11100 pbCodeBuf[off++] = X86_OP_REX_B;
11101 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
11102 pbCodeBuf[off++] = RT_BYTE1(uValue);
11103 pbCodeBuf[off++] = RT_BYTE2(uValue);
11104
11105#elif defined(RT_ARCH_ARM64)
11106 /* movk xdst, #uValue, lsl #0 */
11107 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11108 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
11109
11110#else
11111# error "Port me!"
11112#endif
11113
11114 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11115
11116 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11117 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11118 return off;
11119}
11120
11121
11122#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
11123 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
11124
11125/** Emits code for IEM_MC_STORE_GREG_U16. */
11126DECL_INLINE_THROW(uint32_t)
11127iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11128{
11129 Assert(iGReg < 16);
11130 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11131
11132 /*
11133 * If it's a constant value (unlikely) we treat this as a
11134 * IEM_MC_STORE_GREG_U16_CONST statement.
11135 */
11136 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11137 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11138 { /* likely */ }
11139 else
11140 {
11141 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11142 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11143 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
11144 }
11145
11146 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11147 kIemNativeGstRegUse_ForUpdate);
11148
11149#ifdef RT_ARCH_AMD64
11150 /* mov reg16, reg16 or [mem16] */
11151 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
11152 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11153 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
11154 {
11155 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
11156 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
11157 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
11158 pbCodeBuf[off++] = 0x8b;
11159 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
11160 }
11161 else
11162 {
11163 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
11164 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
11165 if (idxGstTmpReg >= 8)
11166 pbCodeBuf[off++] = X86_OP_REX_R;
11167 pbCodeBuf[off++] = 0x8b;
11168 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11169 }
11170
11171#elif defined(RT_ARCH_ARM64)
11172 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11173 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11174 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11175 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
11176 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11177
11178#else
11179# error "Port me!"
11180#endif
11181
11182 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11183
11184 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11185 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11186 return off;
11187}
11188
11189
11190#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
11191 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
11192
11193/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
11194DECL_INLINE_THROW(uint32_t)
11195iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
11196{
11197 Assert(iGReg < 16);
11198 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11199 kIemNativeGstRegUse_ForFullWrite);
11200 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11201 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11202 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11203 return off;
11204}
11205
11206
11207#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
11208 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
11209
11210/** Emits code for IEM_MC_STORE_GREG_U32. */
11211DECL_INLINE_THROW(uint32_t)
11212iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11213{
11214 Assert(iGReg < 16);
11215 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11216
11217 /*
11218 * If it's a constant value (unlikely) we treat this as a
11219 * IEM_MC_STORE_GREG_U32_CONST statement.
11220 */
11221 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11222 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11223 { /* likely */ }
11224 else
11225 {
11226 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11227 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11228 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
11229 }
11230
11231 /*
11232 * For the rest we allocate a guest register for the variable and writes
11233 * it to the CPUMCTX structure.
11234 */
11235 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11236 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11237#ifdef VBOX_STRICT
11238 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
11239#endif
11240 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11241 return off;
11242}
11243
11244
11245#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
11246 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
11247
11248/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
11249DECL_INLINE_THROW(uint32_t)
11250iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
11251{
11252 Assert(iGReg < 16);
11253 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11254 kIemNativeGstRegUse_ForFullWrite);
11255 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11256 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11257 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11258 return off;
11259}
11260
11261
11262#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
11263 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
11264
11265/** Emits code for IEM_MC_STORE_GREG_U64. */
11266DECL_INLINE_THROW(uint32_t)
11267iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11268{
11269 Assert(iGReg < 16);
11270 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11271
11272 /*
11273 * If it's a constant value (unlikely) we treat this as a
11274 * IEM_MC_STORE_GREG_U64_CONST statement.
11275 */
11276 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11277 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11278 { /* likely */ }
11279 else
11280 {
11281 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11282 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11283 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
11284 }
11285
11286 /*
11287 * For the rest we allocate a guest register for the variable and writes
11288 * it to the CPUMCTX structure.
11289 */
11290 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11291 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11292 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11293 return off;
11294}
11295
11296
11297#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
11298 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
11299
11300/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
11301DECL_INLINE_THROW(uint32_t)
11302iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
11303{
11304 Assert(iGReg < 16);
11305 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11306 kIemNativeGstRegUse_ForUpdate);
11307 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
11308 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11309 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11310 return off;
11311}
11312
11313
11314/*********************************************************************************************************************************
11315* General purpose register manipulation (add, sub). *
11316*********************************************************************************************************************************/
11317
11318#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11319 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11320
11321/** Emits code for IEM_MC_ADD_GREG_U16. */
11322DECL_INLINE_THROW(uint32_t)
11323iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
11324{
11325 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11326 kIemNativeGstRegUse_ForUpdate);
11327
11328#ifdef RT_ARCH_AMD64
11329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11330 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11331 if (idxGstTmpReg >= 8)
11332 pbCodeBuf[off++] = X86_OP_REX_B;
11333 if (uAddend == 1)
11334 {
11335 pbCodeBuf[off++] = 0xff; /* inc */
11336 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11337 }
11338 else
11339 {
11340 pbCodeBuf[off++] = 0x81;
11341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11342 pbCodeBuf[off++] = uAddend;
11343 pbCodeBuf[off++] = 0;
11344 }
11345
11346#else
11347 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11348 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11349
11350 /* sub tmp, gstgrp, uAddend */
11351 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
11352
11353 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11354 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11355
11356 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11357#endif
11358
11359 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11360
11361 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11362
11363 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11364 return off;
11365}
11366
11367
11368#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
11369 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11370
11371#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
11372 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11373
11374/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
11375DECL_INLINE_THROW(uint32_t)
11376iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
11377{
11378 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11379 kIemNativeGstRegUse_ForUpdate);
11380
11381#ifdef RT_ARCH_AMD64
11382 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11383 if (f64Bit)
11384 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11385 else if (idxGstTmpReg >= 8)
11386 pbCodeBuf[off++] = X86_OP_REX_B;
11387 if (uAddend == 1)
11388 {
11389 pbCodeBuf[off++] = 0xff; /* inc */
11390 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11391 }
11392 else if (uAddend < 128)
11393 {
11394 pbCodeBuf[off++] = 0x83; /* add */
11395 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11396 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11397 }
11398 else
11399 {
11400 pbCodeBuf[off++] = 0x81; /* add */
11401 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11402 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11403 pbCodeBuf[off++] = 0;
11404 pbCodeBuf[off++] = 0;
11405 pbCodeBuf[off++] = 0;
11406 }
11407
11408#else
11409 /* sub tmp, gstgrp, uAddend */
11410 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11411 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
11412
11413#endif
11414
11415 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11416
11417 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11418
11419 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11420 return off;
11421}
11422
11423
11424
11425#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11426 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11427
11428/** Emits code for IEM_MC_SUB_GREG_U16. */
11429DECL_INLINE_THROW(uint32_t)
11430iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
11431{
11432 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11433 kIemNativeGstRegUse_ForUpdate);
11434
11435#ifdef RT_ARCH_AMD64
11436 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11437 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11438 if (idxGstTmpReg >= 8)
11439 pbCodeBuf[off++] = X86_OP_REX_B;
11440 if (uSubtrahend == 1)
11441 {
11442 pbCodeBuf[off++] = 0xff; /* dec */
11443 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11444 }
11445 else
11446 {
11447 pbCodeBuf[off++] = 0x81;
11448 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11449 pbCodeBuf[off++] = uSubtrahend;
11450 pbCodeBuf[off++] = 0;
11451 }
11452
11453#else
11454 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11455 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11456
11457 /* sub tmp, gstgrp, uSubtrahend */
11458 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
11459
11460 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11461 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11462
11463 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11464#endif
11465
11466 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11467
11468 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11469
11470 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11471 return off;
11472}
11473
11474
11475#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
11476 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11477
11478#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
11479 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11480
11481/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
11482DECL_INLINE_THROW(uint32_t)
11483iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
11484{
11485 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11486 kIemNativeGstRegUse_ForUpdate);
11487
11488#ifdef RT_ARCH_AMD64
11489 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11490 if (f64Bit)
11491 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11492 else if (idxGstTmpReg >= 8)
11493 pbCodeBuf[off++] = X86_OP_REX_B;
11494 if (uSubtrahend == 1)
11495 {
11496 pbCodeBuf[off++] = 0xff; /* dec */
11497 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11498 }
11499 else if (uSubtrahend < 128)
11500 {
11501 pbCodeBuf[off++] = 0x83; /* sub */
11502 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11503 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11504 }
11505 else
11506 {
11507 pbCodeBuf[off++] = 0x81; /* sub */
11508 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11509 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11510 pbCodeBuf[off++] = 0;
11511 pbCodeBuf[off++] = 0;
11512 pbCodeBuf[off++] = 0;
11513 }
11514
11515#else
11516 /* sub tmp, gstgrp, uSubtrahend */
11517 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11518 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
11519
11520#endif
11521
11522 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11523
11524 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11525
11526 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11527 return off;
11528}
11529
11530
11531/*********************************************************************************************************************************
11532* Local variable manipulation (add, sub, and, or). *
11533*********************************************************************************************************************************/
11534
11535#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
11536 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11537
11538#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
11539 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11540
11541#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
11542 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11543
11544#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
11545 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11546
11547/** Emits code for AND'ing a local and a constant value. */
11548DECL_INLINE_THROW(uint32_t)
11549iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11550{
11551#ifdef VBOX_STRICT
11552 switch (cbMask)
11553 {
11554 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11555 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11556 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11557 case sizeof(uint64_t): break;
11558 default: AssertFailedBreak();
11559 }
11560#endif
11561
11562 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11563 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11564
11565 if (cbMask <= sizeof(uint32_t))
11566 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
11567 else
11568 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
11569
11570 iemNativeVarRegisterRelease(pReNative, idxVar);
11571 return off;
11572}
11573
11574
11575#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
11576 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11577
11578#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
11579 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11580
11581#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
11582 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11583
11584#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
11585 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11586
11587/** Emits code for OR'ing a local and a constant value. */
11588DECL_INLINE_THROW(uint32_t)
11589iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11590{
11591#ifdef VBOX_STRICT
11592 switch (cbMask)
11593 {
11594 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11595 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11596 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11597 case sizeof(uint64_t): break;
11598 default: AssertFailedBreak();
11599 }
11600#endif
11601
11602 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11603 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11604
11605 if (cbMask <= sizeof(uint32_t))
11606 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
11607 else
11608 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
11609
11610 iemNativeVarRegisterRelease(pReNative, idxVar);
11611 return off;
11612}
11613
11614
11615#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
11616 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
11617
11618#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
11619 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
11620
11621#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
11622 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
11623
11624/** Emits code for reversing the byte order in a local value. */
11625DECL_INLINE_THROW(uint32_t)
11626iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
11627{
11628 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11629 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
11630
11631 switch (cbLocal)
11632 {
11633 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
11634 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
11635 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
11636 default: AssertFailedBreak();
11637 }
11638
11639 iemNativeVarRegisterRelease(pReNative, idxVar);
11640 return off;
11641}
11642
11643
11644
11645/*********************************************************************************************************************************
11646* EFLAGS *
11647*********************************************************************************************************************************/
11648
11649#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
11650# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
11651#else
11652# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
11653 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
11654
11655DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
11656{
11657 if (fEflOutput)
11658 {
11659 PVMCPUCC const pVCpu = pReNative->pVCpu;
11660# ifndef IEMLIVENESS_EXTENDED_LAYOUT
11661 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
11662 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
11663 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
11664# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11665 if (fEflOutput & (a_fEfl)) \
11666 { \
11667 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
11668 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11669 else \
11670 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11671 } else do { } while (0)
11672# else
11673 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
11674 IEMLIVENESSBIT const LivenessClobbered =
11675 {
11676 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11677 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11678 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11679 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11680 };
11681 IEMLIVENESSBIT const LivenessDelayable =
11682 {
11683 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11684 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11685 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11686 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11687 };
11688# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11689 if (fEflOutput & (a_fEfl)) \
11690 { \
11691 if (LivenessClobbered.a_fLivenessMember) \
11692 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11693 else if (LivenessDelayable.a_fLivenessMember) \
11694 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
11695 else \
11696 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11697 } else do { } while (0)
11698# endif
11699 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
11700 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
11701 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
11702 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
11703 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
11704 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
11705 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
11706# undef CHECK_FLAG_AND_UPDATE_STATS
11707 }
11708 RT_NOREF(fEflInput);
11709}
11710#endif /* VBOX_WITH_STATISTICS */
11711
11712#undef IEM_MC_FETCH_EFLAGS /* should not be used */
11713#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11714 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
11715
11716/** Handles IEM_MC_FETCH_EFLAGS_EX. */
11717DECL_INLINE_THROW(uint32_t)
11718iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
11719 uint32_t fEflInput, uint32_t fEflOutput)
11720{
11721 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
11722 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11723 RT_NOREF(fEflInput, fEflOutput);
11724
11725#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
11726# ifdef VBOX_STRICT
11727 if ( pReNative->idxCurCall != 0
11728 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
11729 {
11730 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
11731 uint32_t const fBoth = fEflInput | fEflOutput;
11732# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
11733 AssertMsg( !(fBoth & (a_fElfConst)) \
11734 || (!(fEflInput & (a_fElfConst)) \
11735 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11736 : !(fEflOutput & (a_fElfConst)) \
11737 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11738 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
11739 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
11740 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
11741 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
11742 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
11743 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
11744 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
11745 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
11746 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
11747# undef ASSERT_ONE_EFL
11748 }
11749# endif
11750#endif
11751
11752 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
11753 * the existing shadow copy. */
11754 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
11755 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11756 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11757 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11758 return off;
11759}
11760
11761
11762
11763/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
11764 * start using it with custom native code emission (inlining assembly
11765 * instruction helpers). */
11766#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
11767#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11768 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11769 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
11770
11771/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
11772DECL_INLINE_THROW(uint32_t)
11773iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
11774{
11775 RT_NOREF(fEflOutput);
11776 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
11777 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11778
11779#ifdef VBOX_STRICT
11780 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
11781 uint32_t offFixup = off;
11782 off = iemNativeEmitJnzToFixed(pReNative, off, off);
11783 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
11784 iemNativeFixupFixedJump(pReNative, offFixup, off);
11785
11786 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
11787 offFixup = off;
11788 off = iemNativeEmitJzToFixed(pReNative, off, off);
11789 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
11790 iemNativeFixupFixedJump(pReNative, offFixup, off);
11791
11792 /** @todo validate that only bits in the fElfOutput mask changed. */
11793#endif
11794
11795 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11796 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
11797 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11798 return off;
11799}
11800
11801
11802
11803/*********************************************************************************************************************************
11804* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
11805*********************************************************************************************************************************/
11806
11807#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
11808 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
11809
11810#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
11811 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
11812
11813#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
11814 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
11815
11816
11817/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
11818 * IEM_MC_FETCH_SREG_ZX_U64. */
11819DECL_INLINE_THROW(uint32_t)
11820iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
11821{
11822 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
11823 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
11824 Assert(iSReg < X86_SREG_COUNT);
11825
11826 /*
11827 * For now, we will not create a shadow copy of a selector. The rational
11828 * is that since we do not recompile the popping and loading of segment
11829 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
11830 * pushing and moving to registers, there is only a small chance that the
11831 * shadow copy will be accessed again before the register is reloaded. One
11832 * scenario would be nested called in 16-bit code, but I doubt it's worth
11833 * the extra register pressure atm.
11834 *
11835 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
11836 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
11837 * store scencario covered at present (r160730).
11838 */
11839 iemNativeVarSetKindToStack(pReNative, idxDstVar);
11840 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
11841 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
11842 iemNativeVarRegisterRelease(pReNative, idxDstVar);
11843 return off;
11844}
11845
11846
11847
11848/*********************************************************************************************************************************
11849* Register references. *
11850*********************************************************************************************************************************/
11851
11852#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
11853 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
11854
11855#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
11856 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
11857
11858/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
11859DECL_INLINE_THROW(uint32_t)
11860iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
11861{
11862 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
11863 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11864 Assert(iGRegEx < 20);
11865
11866 if (iGRegEx < 16)
11867 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11868 else
11869 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
11870
11871 /* If we've delayed writing back the register value, flush it now. */
11872 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11873
11874 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11875 if (!fConst)
11876 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
11877
11878 return off;
11879}
11880
11881#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
11882 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
11883
11884#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
11885 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
11886
11887#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
11888 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
11889
11890#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
11891 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
11892
11893#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
11894 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
11895
11896#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
11897 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
11898
11899#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
11900 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
11901
11902#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
11903 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
11904
11905#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
11906 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
11907
11908#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
11909 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
11910
11911/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
11912DECL_INLINE_THROW(uint32_t)
11913iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
11914{
11915 Assert(iGReg < 16);
11916 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
11917 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11918
11919 /* If we've delayed writing back the register value, flush it now. */
11920 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
11921
11922 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11923 if (!fConst)
11924 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
11925
11926 return off;
11927}
11928
11929
11930#undef IEM_MC_REF_EFLAGS /* should not be used. */
11931#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
11932 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11933 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
11934
11935/** Handles IEM_MC_REF_EFLAGS. */
11936DECL_INLINE_THROW(uint32_t)
11937iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
11938{
11939 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
11940 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11941
11942 /* If we've delayed writing back the register value, flush it now. */
11943 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
11944
11945 /* If there is a shadow copy of guest EFLAGS, flush it now. */
11946 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
11947
11948 return off;
11949}
11950
11951
11952/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
11953 * different code from threaded recompiler, maybe it would be helpful. For now
11954 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
11955#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
11956
11957
11958#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
11959 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
11960
11961#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
11962 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
11963
11964#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
11965 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
11966
11967/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
11968DECL_INLINE_THROW(uint32_t)
11969iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
11970{
11971 Assert(iXReg < 16);
11972 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
11973 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11974
11975 /* If we've delayed writing back the register value, flush it now. */
11976 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
11977
11978#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
11979 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11980 if (!fConst)
11981 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
11982#else
11983 RT_NOREF(fConst);
11984#endif
11985
11986 return off;
11987}
11988
11989
11990#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
11991 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
11992
11993/** Handles IEM_MC_REF_MXCSR. */
11994DECL_INLINE_THROW(uint32_t)
11995iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
11996{
11997 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
11998 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11999
12000 /* If we've delayed writing back the register value, flush it now. */
12001 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
12002
12003 /* If there is a shadow copy of guest MXCSR, flush it now. */
12004 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
12005
12006 return off;
12007}
12008
12009
12010
12011/*********************************************************************************************************************************
12012* Effective Address Calculation *
12013*********************************************************************************************************************************/
12014#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
12015 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
12016
12017/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
12018 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
12019DECL_INLINE_THROW(uint32_t)
12020iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12021 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
12022{
12023 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12024
12025 /*
12026 * Handle the disp16 form with no registers first.
12027 *
12028 * Convert to an immediate value, as that'll delay the register allocation
12029 * and assignment till the memory access / call / whatever and we can use
12030 * a more appropriate register (or none at all).
12031 */
12032 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
12033 {
12034 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
12035 return off;
12036 }
12037
12038 /* Determin the displacment. */
12039 uint16_t u16EffAddr;
12040 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12041 {
12042 case 0: u16EffAddr = 0; break;
12043 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
12044 case 2: u16EffAddr = u16Disp; break;
12045 default: AssertFailedStmt(u16EffAddr = 0);
12046 }
12047
12048 /* Determine the registers involved. */
12049 uint8_t idxGstRegBase;
12050 uint8_t idxGstRegIndex;
12051 switch (bRm & X86_MODRM_RM_MASK)
12052 {
12053 case 0:
12054 idxGstRegBase = X86_GREG_xBX;
12055 idxGstRegIndex = X86_GREG_xSI;
12056 break;
12057 case 1:
12058 idxGstRegBase = X86_GREG_xBX;
12059 idxGstRegIndex = X86_GREG_xDI;
12060 break;
12061 case 2:
12062 idxGstRegBase = X86_GREG_xBP;
12063 idxGstRegIndex = X86_GREG_xSI;
12064 break;
12065 case 3:
12066 idxGstRegBase = X86_GREG_xBP;
12067 idxGstRegIndex = X86_GREG_xDI;
12068 break;
12069 case 4:
12070 idxGstRegBase = X86_GREG_xSI;
12071 idxGstRegIndex = UINT8_MAX;
12072 break;
12073 case 5:
12074 idxGstRegBase = X86_GREG_xDI;
12075 idxGstRegIndex = UINT8_MAX;
12076 break;
12077 case 6:
12078 idxGstRegBase = X86_GREG_xBP;
12079 idxGstRegIndex = UINT8_MAX;
12080 break;
12081#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
12082 default:
12083#endif
12084 case 7:
12085 idxGstRegBase = X86_GREG_xBX;
12086 idxGstRegIndex = UINT8_MAX;
12087 break;
12088 }
12089
12090 /*
12091 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
12092 */
12093 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12094 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12095 kIemNativeGstRegUse_ReadOnly);
12096 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
12097 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12098 kIemNativeGstRegUse_ReadOnly)
12099 : UINT8_MAX;
12100#ifdef RT_ARCH_AMD64
12101 if (idxRegIndex == UINT8_MAX)
12102 {
12103 if (u16EffAddr == 0)
12104 {
12105 /* movxz ret, base */
12106 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
12107 }
12108 else
12109 {
12110 /* lea ret32, [base64 + disp32] */
12111 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12112 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12113 if (idxRegRet >= 8 || idxRegBase >= 8)
12114 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12115 pbCodeBuf[off++] = 0x8d;
12116 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12117 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
12118 else
12119 {
12120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
12121 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12122 }
12123 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12124 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12125 pbCodeBuf[off++] = 0;
12126 pbCodeBuf[off++] = 0;
12127 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12128
12129 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12130 }
12131 }
12132 else
12133 {
12134 /* lea ret32, [index64 + base64 (+ disp32)] */
12135 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12136 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12137 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12138 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12139 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12140 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12141 pbCodeBuf[off++] = 0x8d;
12142 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
12143 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12144 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
12145 if (bMod == X86_MOD_MEM4)
12146 {
12147 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12148 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12149 pbCodeBuf[off++] = 0;
12150 pbCodeBuf[off++] = 0;
12151 }
12152 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12153 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12154 }
12155
12156#elif defined(RT_ARCH_ARM64)
12157 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
12158 if (u16EffAddr == 0)
12159 {
12160 if (idxRegIndex == UINT8_MAX)
12161 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
12162 else
12163 {
12164 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
12165 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12166 }
12167 }
12168 else
12169 {
12170 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
12171 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
12172 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
12173 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12174 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
12175 else
12176 {
12177 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
12178 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12179 }
12180 if (idxRegIndex != UINT8_MAX)
12181 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
12182 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12183 }
12184
12185#else
12186# error "port me"
12187#endif
12188
12189 if (idxRegIndex != UINT8_MAX)
12190 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12191 iemNativeRegFreeTmp(pReNative, idxRegBase);
12192 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12193 return off;
12194}
12195
12196
12197#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
12198 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
12199
12200/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
12201 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
12202DECL_INLINE_THROW(uint32_t)
12203iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12204 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
12205{
12206 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12207
12208 /*
12209 * Handle the disp32 form with no registers first.
12210 *
12211 * Convert to an immediate value, as that'll delay the register allocation
12212 * and assignment till the memory access / call / whatever and we can use
12213 * a more appropriate register (or none at all).
12214 */
12215 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12216 {
12217 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
12218 return off;
12219 }
12220
12221 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12222 uint32_t u32EffAddr = 0;
12223 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12224 {
12225 case 0: break;
12226 case 1: u32EffAddr = (int8_t)u32Disp; break;
12227 case 2: u32EffAddr = u32Disp; break;
12228 default: AssertFailed();
12229 }
12230
12231 /* Get the register (or SIB) value. */
12232 uint8_t idxGstRegBase = UINT8_MAX;
12233 uint8_t idxGstRegIndex = UINT8_MAX;
12234 uint8_t cShiftIndex = 0;
12235 switch (bRm & X86_MODRM_RM_MASK)
12236 {
12237 case 0: idxGstRegBase = X86_GREG_xAX; break;
12238 case 1: idxGstRegBase = X86_GREG_xCX; break;
12239 case 2: idxGstRegBase = X86_GREG_xDX; break;
12240 case 3: idxGstRegBase = X86_GREG_xBX; break;
12241 case 4: /* SIB */
12242 {
12243 /* index /w scaling . */
12244 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12245 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12246 {
12247 case 0: idxGstRegIndex = X86_GREG_xAX; break;
12248 case 1: idxGstRegIndex = X86_GREG_xCX; break;
12249 case 2: idxGstRegIndex = X86_GREG_xDX; break;
12250 case 3: idxGstRegIndex = X86_GREG_xBX; break;
12251 case 4: cShiftIndex = 0; /*no index*/ break;
12252 case 5: idxGstRegIndex = X86_GREG_xBP; break;
12253 case 6: idxGstRegIndex = X86_GREG_xSI; break;
12254 case 7: idxGstRegIndex = X86_GREG_xDI; break;
12255 }
12256
12257 /* base */
12258 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
12259 {
12260 case 0: idxGstRegBase = X86_GREG_xAX; break;
12261 case 1: idxGstRegBase = X86_GREG_xCX; break;
12262 case 2: idxGstRegBase = X86_GREG_xDX; break;
12263 case 3: idxGstRegBase = X86_GREG_xBX; break;
12264 case 4:
12265 idxGstRegBase = X86_GREG_xSP;
12266 u32EffAddr += uSibAndRspOffset >> 8;
12267 break;
12268 case 5:
12269 if ((bRm & X86_MODRM_MOD_MASK) != 0)
12270 idxGstRegBase = X86_GREG_xBP;
12271 else
12272 {
12273 Assert(u32EffAddr == 0);
12274 u32EffAddr = u32Disp;
12275 }
12276 break;
12277 case 6: idxGstRegBase = X86_GREG_xSI; break;
12278 case 7: idxGstRegBase = X86_GREG_xDI; break;
12279 }
12280 break;
12281 }
12282 case 5: idxGstRegBase = X86_GREG_xBP; break;
12283 case 6: idxGstRegBase = X86_GREG_xSI; break;
12284 case 7: idxGstRegBase = X86_GREG_xDI; break;
12285 }
12286
12287 /*
12288 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12289 * the start of the function.
12290 */
12291 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12292 {
12293 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
12294 return off;
12295 }
12296
12297 /*
12298 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12299 */
12300 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12301 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12302 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12303 kIemNativeGstRegUse_ReadOnly);
12304 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12305 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12306 kIemNativeGstRegUse_ReadOnly);
12307
12308 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12309 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12310 {
12311 idxRegBase = idxRegIndex;
12312 idxRegIndex = UINT8_MAX;
12313 }
12314
12315#ifdef RT_ARCH_AMD64
12316 if (idxRegIndex == UINT8_MAX)
12317 {
12318 if (u32EffAddr == 0)
12319 {
12320 /* mov ret, base */
12321 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12322 }
12323 else
12324 {
12325 /* lea ret32, [base64 + disp32] */
12326 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12327 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12328 if (idxRegRet >= 8 || idxRegBase >= 8)
12329 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12330 pbCodeBuf[off++] = 0x8d;
12331 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12332 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12333 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12334 else
12335 {
12336 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12337 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12338 }
12339 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12340 if (bMod == X86_MOD_MEM4)
12341 {
12342 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12343 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12344 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12345 }
12346 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12347 }
12348 }
12349 else
12350 {
12351 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12352 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12353 if (idxRegBase == UINT8_MAX)
12354 {
12355 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
12356 if (idxRegRet >= 8 || idxRegIndex >= 8)
12357 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12358 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12359 pbCodeBuf[off++] = 0x8d;
12360 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12361 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12362 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12363 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12364 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12365 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12366 }
12367 else
12368 {
12369 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12370 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12371 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12372 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12373 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12374 pbCodeBuf[off++] = 0x8d;
12375 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12376 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12377 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12378 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12379 if (bMod != X86_MOD_MEM0)
12380 {
12381 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12382 if (bMod == X86_MOD_MEM4)
12383 {
12384 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12385 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12386 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12387 }
12388 }
12389 }
12390 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12391 }
12392
12393#elif defined(RT_ARCH_ARM64)
12394 if (u32EffAddr == 0)
12395 {
12396 if (idxRegIndex == UINT8_MAX)
12397 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12398 else if (idxRegBase == UINT8_MAX)
12399 {
12400 if (cShiftIndex == 0)
12401 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
12402 else
12403 {
12404 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12405 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
12406 }
12407 }
12408 else
12409 {
12410 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12411 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12412 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12413 }
12414 }
12415 else
12416 {
12417 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
12418 {
12419 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12420 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
12421 }
12422 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
12423 {
12424 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12425 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12426 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
12427 }
12428 else
12429 {
12430 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
12431 if (idxRegBase != UINT8_MAX)
12432 {
12433 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12434 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12435 }
12436 }
12437 if (idxRegIndex != UINT8_MAX)
12438 {
12439 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12440 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12441 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12442 }
12443 }
12444
12445#else
12446# error "port me"
12447#endif
12448
12449 if (idxRegIndex != UINT8_MAX)
12450 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12451 if (idxRegBase != UINT8_MAX)
12452 iemNativeRegFreeTmp(pReNative, idxRegBase);
12453 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12454 return off;
12455}
12456
12457
12458#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12459 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12460 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12461
12462#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12463 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12464 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12465
12466#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12467 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12468 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
12469
12470/**
12471 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
12472 *
12473 * @returns New off.
12474 * @param pReNative .
12475 * @param off .
12476 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
12477 * bit 4 to REX.X. The two bits are part of the
12478 * REG sub-field, which isn't needed in this
12479 * function.
12480 * @param uSibAndRspOffset Two parts:
12481 * - The first 8 bits make up the SIB byte.
12482 * - The next 8 bits are the fixed RSP/ESP offset
12483 * in case of a pop [xSP].
12484 * @param u32Disp The displacement byte/word/dword, if any.
12485 * @param cbInstr The size of the fully decoded instruction. Used
12486 * for RIP relative addressing.
12487 * @param idxVarRet The result variable number.
12488 * @param f64Bit Whether to use a 64-bit or 32-bit address size
12489 * when calculating the address.
12490 *
12491 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
12492 */
12493DECL_INLINE_THROW(uint32_t)
12494iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
12495 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
12496{
12497 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12498
12499 /*
12500 * Special case the rip + disp32 form first.
12501 */
12502 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12503 {
12504#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12505 /* Need to take the current PC offset into account for the displacement, no need to flush here
12506 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
12507 u32Disp += pReNative->Core.offPc;
12508#endif
12509
12510 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12511 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
12512 kIemNativeGstRegUse_ReadOnly);
12513#ifdef RT_ARCH_AMD64
12514 if (f64Bit)
12515 {
12516 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
12517 if ((int32_t)offFinalDisp == offFinalDisp)
12518 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
12519 else
12520 {
12521 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
12522 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
12523 }
12524 }
12525 else
12526 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
12527
12528#elif defined(RT_ARCH_ARM64)
12529 if (f64Bit)
12530 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12531 (int64_t)(int32_t)u32Disp + cbInstr);
12532 else
12533 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12534 (int32_t)u32Disp + cbInstr);
12535
12536#else
12537# error "Port me!"
12538#endif
12539 iemNativeRegFreeTmp(pReNative, idxRegPc);
12540 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12541 return off;
12542 }
12543
12544 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12545 int64_t i64EffAddr = 0;
12546 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12547 {
12548 case 0: break;
12549 case 1: i64EffAddr = (int8_t)u32Disp; break;
12550 case 2: i64EffAddr = (int32_t)u32Disp; break;
12551 default: AssertFailed();
12552 }
12553
12554 /* Get the register (or SIB) value. */
12555 uint8_t idxGstRegBase = UINT8_MAX;
12556 uint8_t idxGstRegIndex = UINT8_MAX;
12557 uint8_t cShiftIndex = 0;
12558 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
12559 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
12560 else /* SIB: */
12561 {
12562 /* index /w scaling . */
12563 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12564 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12565 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
12566 if (idxGstRegIndex == 4)
12567 {
12568 /* no index */
12569 cShiftIndex = 0;
12570 idxGstRegIndex = UINT8_MAX;
12571 }
12572
12573 /* base */
12574 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
12575 if (idxGstRegBase == 4)
12576 {
12577 /* pop [rsp] hack */
12578 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
12579 }
12580 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
12581 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
12582 {
12583 /* mod=0 and base=5 -> disp32, no base reg. */
12584 Assert(i64EffAddr == 0);
12585 i64EffAddr = (int32_t)u32Disp;
12586 idxGstRegBase = UINT8_MAX;
12587 }
12588 }
12589
12590 /*
12591 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12592 * the start of the function.
12593 */
12594 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12595 {
12596 if (f64Bit)
12597 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
12598 else
12599 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
12600 return off;
12601 }
12602
12603 /*
12604 * Now emit code that calculates:
12605 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12606 * or if !f64Bit:
12607 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12608 */
12609 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12610 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12611 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12612 kIemNativeGstRegUse_ReadOnly);
12613 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12614 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12615 kIemNativeGstRegUse_ReadOnly);
12616
12617 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12618 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12619 {
12620 idxRegBase = idxRegIndex;
12621 idxRegIndex = UINT8_MAX;
12622 }
12623
12624#ifdef RT_ARCH_AMD64
12625 uint8_t bFinalAdj;
12626 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
12627 bFinalAdj = 0; /* likely */
12628 else
12629 {
12630 /* pop [rsp] with a problematic disp32 value. Split out the
12631 RSP offset and add it separately afterwards (bFinalAdj). */
12632 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
12633 Assert(idxGstRegBase == X86_GREG_xSP);
12634 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
12635 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
12636 Assert(bFinalAdj != 0);
12637 i64EffAddr -= bFinalAdj;
12638 Assert((int32_t)i64EffAddr == i64EffAddr);
12639 }
12640 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
12641//pReNative->pInstrBuf[off++] = 0xcc;
12642
12643 if (idxRegIndex == UINT8_MAX)
12644 {
12645 if (u32EffAddr == 0)
12646 {
12647 /* mov ret, base */
12648 if (f64Bit)
12649 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
12650 else
12651 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12652 }
12653 else
12654 {
12655 /* lea ret, [base + disp32] */
12656 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12657 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12658 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
12659 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12660 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12661 | (f64Bit ? X86_OP_REX_W : 0);
12662 pbCodeBuf[off++] = 0x8d;
12663 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12664 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12665 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12666 else
12667 {
12668 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12669 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12670 }
12671 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12672 if (bMod == X86_MOD_MEM4)
12673 {
12674 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12675 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12676 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12677 }
12678 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12679 }
12680 }
12681 else
12682 {
12683 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12685 if (idxRegBase == UINT8_MAX)
12686 {
12687 /* lea ret, [(index64 << cShiftIndex) + disp32] */
12688 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
12689 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12690 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12691 | (f64Bit ? X86_OP_REX_W : 0);
12692 pbCodeBuf[off++] = 0x8d;
12693 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12694 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12695 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12696 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12697 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12698 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12699 }
12700 else
12701 {
12702 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12703 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12704 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12705 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12706 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12707 | (f64Bit ? X86_OP_REX_W : 0);
12708 pbCodeBuf[off++] = 0x8d;
12709 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12710 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12711 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12712 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12713 if (bMod != X86_MOD_MEM0)
12714 {
12715 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12716 if (bMod == X86_MOD_MEM4)
12717 {
12718 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12719 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12720 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12721 }
12722 }
12723 }
12724 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12725 }
12726
12727 if (!bFinalAdj)
12728 { /* likely */ }
12729 else
12730 {
12731 Assert(f64Bit);
12732 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
12733 }
12734
12735#elif defined(RT_ARCH_ARM64)
12736 if (i64EffAddr == 0)
12737 {
12738 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12739 if (idxRegIndex == UINT8_MAX)
12740 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
12741 else if (idxRegBase != UINT8_MAX)
12742 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12743 f64Bit, false /*fSetFlags*/, cShiftIndex);
12744 else
12745 {
12746 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
12747 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
12748 }
12749 }
12750 else
12751 {
12752 if (f64Bit)
12753 { /* likely */ }
12754 else
12755 i64EffAddr = (int32_t)i64EffAddr;
12756
12757 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
12758 {
12759 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12760 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
12761 }
12762 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
12763 {
12764 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12765 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
12766 }
12767 else
12768 {
12769 if (f64Bit)
12770 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
12771 else
12772 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
12773 if (idxRegBase != UINT8_MAX)
12774 {
12775 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12776 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
12777 }
12778 }
12779 if (idxRegIndex != UINT8_MAX)
12780 {
12781 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12782 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12783 f64Bit, false /*fSetFlags*/, cShiftIndex);
12784 }
12785 }
12786
12787#else
12788# error "port me"
12789#endif
12790
12791 if (idxRegIndex != UINT8_MAX)
12792 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12793 if (idxRegBase != UINT8_MAX)
12794 iemNativeRegFreeTmp(pReNative, idxRegBase);
12795 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12796 return off;
12797}
12798
12799
12800/*********************************************************************************************************************************
12801* TLB Lookup. *
12802*********************************************************************************************************************************/
12803
12804/**
12805 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
12806 */
12807DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
12808{
12809 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
12810 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
12811 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
12812 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
12813
12814 /* Do the lookup manually. */
12815 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
12816 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
12817 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
12818 if (RT_LIKELY(pTlbe->uTag == uTag))
12819 {
12820 /*
12821 * Check TLB page table level access flags.
12822 */
12823 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
12824 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
12825 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
12826 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
12827 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
12828 | IEMTLBE_F_PG_UNASSIGNED
12829 | IEMTLBE_F_PT_NO_ACCESSED
12830 | fNoWriteNoDirty | fNoUser);
12831 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
12832 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
12833 {
12834 /*
12835 * Return the address.
12836 */
12837 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
12838 if ((uintptr_t)pbAddr == uResult)
12839 return;
12840 RT_NOREF(cbMem);
12841 AssertFailed();
12842 }
12843 else
12844 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
12845 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
12846 }
12847 else
12848 AssertFailed();
12849 RT_BREAKPOINT();
12850}
12851
12852/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
12853
12854
12855/*********************************************************************************************************************************
12856* Memory fetches and stores common *
12857*********************************************************************************************************************************/
12858
12859typedef enum IEMNATIVEMITMEMOP
12860{
12861 kIemNativeEmitMemOp_Store = 0,
12862 kIemNativeEmitMemOp_Fetch,
12863 kIemNativeEmitMemOp_Fetch_Zx_U16,
12864 kIemNativeEmitMemOp_Fetch_Zx_U32,
12865 kIemNativeEmitMemOp_Fetch_Zx_U64,
12866 kIemNativeEmitMemOp_Fetch_Sx_U16,
12867 kIemNativeEmitMemOp_Fetch_Sx_U32,
12868 kIemNativeEmitMemOp_Fetch_Sx_U64
12869} IEMNATIVEMITMEMOP;
12870
12871/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
12872 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
12873 * (with iSegReg = UINT8_MAX). */
12874DECL_INLINE_THROW(uint32_t)
12875iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
12876 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
12877 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
12878{
12879 /*
12880 * Assert sanity.
12881 */
12882 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12883 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12884 Assert( enmOp != kIemNativeEmitMemOp_Store
12885 || pVarValue->enmKind == kIemNativeVarKind_Immediate
12886 || pVarValue->enmKind == kIemNativeVarKind_Stack);
12887 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12888 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
12889 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
12890 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
12891 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12892 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12893 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
12894 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12895#ifdef VBOX_STRICT
12896 if (iSegReg == UINT8_MAX)
12897 {
12898 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12899 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12900 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12901 switch (cbMem)
12902 {
12903 case 1:
12904 Assert( pfnFunction
12905 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
12906 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12907 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12908 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12909 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12910 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
12911 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
12912 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
12913 : UINT64_C(0xc000b000a0009000) ));
12914 break;
12915 case 2:
12916 Assert( pfnFunction
12917 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
12918 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12919 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12920 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12921 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
12922 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
12923 : UINT64_C(0xc000b000a0009000) ));
12924 break;
12925 case 4:
12926 Assert( pfnFunction
12927 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
12928 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12929 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12930 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
12931 : UINT64_C(0xc000b000a0009000) ));
12932 break;
12933 case 8:
12934 Assert( pfnFunction
12935 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
12936 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
12937 : UINT64_C(0xc000b000a0009000) ));
12938 break;
12939 }
12940 }
12941 else
12942 {
12943 Assert(iSegReg < 6);
12944 switch (cbMem)
12945 {
12946 case 1:
12947 Assert( pfnFunction
12948 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
12949 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
12950 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
12951 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
12952 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
12953 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
12954 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
12955 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
12956 : UINT64_C(0xc000b000a0009000) ));
12957 break;
12958 case 2:
12959 Assert( pfnFunction
12960 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
12961 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
12962 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
12963 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
12964 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
12965 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
12966 : UINT64_C(0xc000b000a0009000) ));
12967 break;
12968 case 4:
12969 Assert( pfnFunction
12970 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
12971 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
12972 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
12973 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
12974 : UINT64_C(0xc000b000a0009000) ));
12975 break;
12976 case 8:
12977 Assert( pfnFunction
12978 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
12979 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
12980 : UINT64_C(0xc000b000a0009000) ));
12981 break;
12982 }
12983 }
12984#endif
12985
12986#ifdef VBOX_STRICT
12987 /*
12988 * Check that the fExec flags we've got make sense.
12989 */
12990 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12991#endif
12992
12993 /*
12994 * To keep things simple we have to commit any pending writes first as we
12995 * may end up making calls.
12996 */
12997 /** @todo we could postpone this till we make the call and reload the
12998 * registers after returning from the call. Not sure if that's sensible or
12999 * not, though. */
13000#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13001 off = iemNativeRegFlushPendingWrites(pReNative, off);
13002#else
13003 /* The program counter is treated differently for now. */
13004 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
13005#endif
13006
13007#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13008 /*
13009 * Move/spill/flush stuff out of call-volatile registers.
13010 * This is the easy way out. We could contain this to the tlb-miss branch
13011 * by saving and restoring active stuff here.
13012 */
13013 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13014#endif
13015
13016 /*
13017 * Define labels and allocate the result register (trying for the return
13018 * register if we can).
13019 */
13020 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13021 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
13022 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13023 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
13024 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
13025 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
13026 uint8_t const idxRegValueStore = !TlbState.fSkip
13027 && enmOp == kIemNativeEmitMemOp_Store
13028 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13029 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
13030 : UINT8_MAX;
13031 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13032 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13033 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13034 : UINT32_MAX;
13035
13036 /*
13037 * Jump to the TLB lookup code.
13038 */
13039 if (!TlbState.fSkip)
13040 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13041
13042 /*
13043 * TlbMiss:
13044 *
13045 * Call helper to do the fetching.
13046 * We flush all guest register shadow copies here.
13047 */
13048 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13049
13050#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13051 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13052#else
13053 RT_NOREF(idxInstr);
13054#endif
13055
13056#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13057 if (pReNative->Core.offPc)
13058 {
13059 /*
13060 * Update the program counter but restore it at the end of the TlbMiss branch.
13061 * This should allow delaying more program counter updates for the TlbLookup and hit paths
13062 * which are hopefully much more frequent, reducing the amount of memory accesses.
13063 */
13064 /* Allocate a temporary PC register. */
13065 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13066
13067 /* Perform the addition and store the result. */
13068 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13069 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13070
13071 /* Free and flush the PC register. */
13072 iemNativeRegFreeTmp(pReNative, idxPcReg);
13073 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13074 }
13075#endif
13076
13077#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13078 /* Save variables in volatile registers. */
13079 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13080 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
13081 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
13082 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13083#endif
13084
13085 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
13086 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
13087 if (enmOp == kIemNativeEmitMemOp_Store)
13088 {
13089 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
13090 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
13091#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13092 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13093#else
13094 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13095 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
13096#endif
13097 }
13098
13099 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
13100 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
13101#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13102 fVolGregMask);
13103#else
13104 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
13105#endif
13106
13107 if (iSegReg != UINT8_MAX)
13108 {
13109 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
13110 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13111 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
13112 }
13113
13114 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13115 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13116
13117 /* Done setting up parameters, make the call. */
13118 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13119
13120 /*
13121 * Put the result in the right register if this is a fetch.
13122 */
13123 if (enmOp != kIemNativeEmitMemOp_Store)
13124 {
13125 Assert(idxRegValueFetch == pVarValue->idxReg);
13126 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
13127 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
13128 }
13129
13130#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13131 /* Restore variables and guest shadow registers to volatile registers. */
13132 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13133 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13134#endif
13135
13136#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13137 if (pReNative->Core.offPc)
13138 {
13139 /*
13140 * Time to restore the program counter to its original value.
13141 */
13142 /* Allocate a temporary PC register. */
13143 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13144
13145 /* Restore the original value. */
13146 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13147 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13148
13149 /* Free and flush the PC register. */
13150 iemNativeRegFreeTmp(pReNative, idxPcReg);
13151 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13152 }
13153#endif
13154
13155#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13156 if (!TlbState.fSkip)
13157 {
13158 /* end of TlbMiss - Jump to the done label. */
13159 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13160 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13161
13162 /*
13163 * TlbLookup:
13164 */
13165 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
13166 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
13167 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
13168
13169 /*
13170 * Emit code to do the actual storing / fetching.
13171 */
13172 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13173# ifdef VBOX_WITH_STATISTICS
13174 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13175 enmOp == kIemNativeEmitMemOp_Store
13176 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
13177 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
13178# endif
13179 switch (enmOp)
13180 {
13181 case kIemNativeEmitMemOp_Store:
13182 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
13183 {
13184 switch (cbMem)
13185 {
13186 case 1:
13187 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13188 break;
13189 case 2:
13190 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13191 break;
13192 case 4:
13193 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13194 break;
13195 case 8:
13196 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13197 break;
13198 default:
13199 AssertFailed();
13200 }
13201 }
13202 else
13203 {
13204 switch (cbMem)
13205 {
13206 case 1:
13207 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
13208 idxRegMemResult, TlbState.idxReg1);
13209 break;
13210 case 2:
13211 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
13212 idxRegMemResult, TlbState.idxReg1);
13213 break;
13214 case 4:
13215 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
13216 idxRegMemResult, TlbState.idxReg1);
13217 break;
13218 case 8:
13219 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
13220 idxRegMemResult, TlbState.idxReg1);
13221 break;
13222 default:
13223 AssertFailed();
13224 }
13225 }
13226 break;
13227
13228 case kIemNativeEmitMemOp_Fetch:
13229 case kIemNativeEmitMemOp_Fetch_Zx_U16:
13230 case kIemNativeEmitMemOp_Fetch_Zx_U32:
13231 case kIemNativeEmitMemOp_Fetch_Zx_U64:
13232 switch (cbMem)
13233 {
13234 case 1:
13235 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13236 break;
13237 case 2:
13238 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13239 break;
13240 case 4:
13241 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13242 break;
13243 case 8:
13244 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13245 break;
13246 default:
13247 AssertFailed();
13248 }
13249 break;
13250
13251 case kIemNativeEmitMemOp_Fetch_Sx_U16:
13252 Assert(cbMem == 1);
13253 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13254 break;
13255
13256 case kIemNativeEmitMemOp_Fetch_Sx_U32:
13257 Assert(cbMem == 1 || cbMem == 2);
13258 if (cbMem == 1)
13259 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13260 else
13261 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13262 break;
13263
13264 case kIemNativeEmitMemOp_Fetch_Sx_U64:
13265 switch (cbMem)
13266 {
13267 case 1:
13268 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13269 break;
13270 case 2:
13271 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13272 break;
13273 case 4:
13274 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13275 break;
13276 default:
13277 AssertFailed();
13278 }
13279 break;
13280
13281 default:
13282 AssertFailed();
13283 }
13284
13285 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13286
13287 /*
13288 * TlbDone:
13289 */
13290 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13291
13292 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13293
13294# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13295 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13296 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13297# endif
13298 }
13299#else
13300 RT_NOREF(fAlignMask, idxLabelTlbMiss);
13301#endif
13302
13303 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
13304 iemNativeVarRegisterRelease(pReNative, idxVarValue);
13305 return off;
13306}
13307
13308
13309
13310/*********************************************************************************************************************************
13311* Memory fetches (IEM_MEM_FETCH_XXX). *
13312*********************************************************************************************************************************/
13313
13314/* 8-bit segmented: */
13315#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
13316 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
13317 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13318 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13319
13320#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13321 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13322 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13323 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13324
13325#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13326 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13327 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13328 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13329
13330#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13331 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13332 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13333 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13334
13335#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13336 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13337 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13338 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13339
13340#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13341 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13342 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13343 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13344
13345#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13346 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13347 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13348 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13349
13350/* 16-bit segmented: */
13351#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13352 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13353 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13354 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13355
13356#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13357 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13358 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13359 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13360
13361#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13362 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13363 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13364 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13365
13366#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13367 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13368 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13369 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13370
13371#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13372 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13373 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13374 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13375
13376#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13377 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13378 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13379 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13380
13381
13382/* 32-bit segmented: */
13383#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13384 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13385 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13386 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13387
13388#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13389 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13390 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13391 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13392
13393#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13394 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13395 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13396 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13397
13398#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13399 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13400 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13401 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13402
13403
13404/* 64-bit segmented: */
13405#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13406 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13407 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13408 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
13409
13410
13411
13412/* 8-bit flat: */
13413#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
13414 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
13415 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13416 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13417
13418#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
13419 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13420 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13421 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13422
13423#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
13424 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13425 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13426 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13427
13428#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
13429 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13430 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13431 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13432
13433#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
13434 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13435 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13436 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13437
13438#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
13439 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13440 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13441 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13442
13443#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
13444 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13445 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13446 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13447
13448
13449/* 16-bit flat: */
13450#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
13451 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13452 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13453 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13454
13455#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
13456 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13457 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13458 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13459
13460#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
13461 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13462 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13463 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13464
13465#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
13466 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13467 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13468 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13469
13470#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
13471 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13472 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13473 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13474
13475#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
13476 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13477 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13478 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13479
13480/* 32-bit flat: */
13481#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
13482 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13483 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13484 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13485
13486#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
13487 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13488 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13489 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13490
13491#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
13492 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13493 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13494 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13495
13496#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
13497 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13498 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13499 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13500
13501/* 64-bit flat: */
13502#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
13503 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13504 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13505 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
13506
13507
13508
13509/*********************************************************************************************************************************
13510* Memory stores (IEM_MEM_STORE_XXX). *
13511*********************************************************************************************************************************/
13512
13513#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
13514 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
13515 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13516 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13517
13518#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
13519 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
13520 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13521 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13522
13523#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
13524 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
13525 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13526 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13527
13528#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
13529 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
13530 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13531 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13532
13533
13534#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
13535 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
13536 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13537 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13538
13539#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
13540 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
13541 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13542 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13543
13544#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
13545 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
13546 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13547 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13548
13549#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
13550 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
13551 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13552 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13553
13554
13555#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
13556 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13557 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13558
13559#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
13560 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13561 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13562
13563#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
13564 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13565 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13566
13567#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
13568 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13569 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13570
13571
13572#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
13573 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13574 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13575
13576#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
13577 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13578 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13579
13580#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
13581 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13582 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13583
13584#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
13585 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13586 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13587
13588/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
13589 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
13590DECL_INLINE_THROW(uint32_t)
13591iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
13592 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
13593{
13594 /*
13595 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
13596 * to do the grunt work.
13597 */
13598 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
13599 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
13600 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
13601 pfnFunction, idxInstr);
13602 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
13603 return off;
13604}
13605
13606
13607
13608/*********************************************************************************************************************************
13609* Stack Accesses. *
13610*********************************************************************************************************************************/
13611/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
13612#define IEM_MC_PUSH_U16(a_u16Value) \
13613 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
13614 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
13615#define IEM_MC_PUSH_U32(a_u32Value) \
13616 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
13617 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
13618#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
13619 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
13620 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
13621#define IEM_MC_PUSH_U64(a_u64Value) \
13622 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
13623 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
13624
13625#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
13626 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
13627 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13628#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
13629 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
13630 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
13631#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
13632 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
13633 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
13634
13635#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
13636 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
13637 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13638#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
13639 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
13640 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
13641
13642
13643DECL_FORCE_INLINE_THROW(uint32_t)
13644iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13645{
13646 /* Use16BitSp: */
13647#ifdef RT_ARCH_AMD64
13648 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
13649 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13650#else
13651 /* sub regeff, regrsp, #cbMem */
13652 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
13653 /* and regeff, regeff, #0xffff */
13654 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
13655 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
13656 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
13657 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
13658#endif
13659 return off;
13660}
13661
13662
13663DECL_FORCE_INLINE(uint32_t)
13664iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13665{
13666 /* Use32BitSp: */
13667 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
13668 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13669 return off;
13670}
13671
13672
13673/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
13674DECL_INLINE_THROW(uint32_t)
13675iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
13676 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
13677{
13678 /*
13679 * Assert sanity.
13680 */
13681 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
13682 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
13683#ifdef VBOX_STRICT
13684 if (RT_BYTE2(cBitsVarAndFlat) != 0)
13685 {
13686 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13687 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13688 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13689 Assert( pfnFunction
13690 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13691 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
13692 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
13693 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13694 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
13695 : UINT64_C(0xc000b000a0009000) ));
13696 }
13697 else
13698 Assert( pfnFunction
13699 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
13700 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
13701 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
13702 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
13703 : UINT64_C(0xc000b000a0009000) ));
13704#endif
13705
13706#ifdef VBOX_STRICT
13707 /*
13708 * Check that the fExec flags we've got make sense.
13709 */
13710 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13711#endif
13712
13713 /*
13714 * To keep things simple we have to commit any pending writes first as we
13715 * may end up making calls.
13716 */
13717 /** @todo we could postpone this till we make the call and reload the
13718 * registers after returning from the call. Not sure if that's sensible or
13719 * not, though. */
13720 off = iemNativeRegFlushPendingWrites(pReNative, off);
13721
13722 /*
13723 * First we calculate the new RSP and the effective stack pointer value.
13724 * For 64-bit mode and flat 32-bit these two are the same.
13725 * (Code structure is very similar to that of PUSH)
13726 */
13727 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13728 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
13729 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
13730 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
13731 ? cbMem : sizeof(uint16_t);
13732 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13733 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13734 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13735 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13736 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13737 if (cBitsFlat != 0)
13738 {
13739 Assert(idxRegEffSp == idxRegRsp);
13740 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13741 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13742 if (cBitsFlat == 64)
13743 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
13744 else
13745 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
13746 }
13747 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13748 {
13749 Assert(idxRegEffSp != idxRegRsp);
13750 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13751 kIemNativeGstRegUse_ReadOnly);
13752#ifdef RT_ARCH_AMD64
13753 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13754#else
13755 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13756#endif
13757 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13758 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13759 offFixupJumpToUseOtherBitSp = off;
13760 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13761 {
13762 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13763 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13764 }
13765 else
13766 {
13767 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13768 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13769 }
13770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13771 }
13772 /* SpUpdateEnd: */
13773 uint32_t const offLabelSpUpdateEnd = off;
13774
13775 /*
13776 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13777 * we're skipping lookup).
13778 */
13779 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13780 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
13781 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13782 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13783 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13784 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13785 : UINT32_MAX;
13786 uint8_t const idxRegValue = !TlbState.fSkip
13787 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13788 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
13789 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
13790 : UINT8_MAX;
13791 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13792
13793
13794 if (!TlbState.fSkip)
13795 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13796 else
13797 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13798
13799 /*
13800 * Use16BitSp:
13801 */
13802 if (cBitsFlat == 0)
13803 {
13804#ifdef RT_ARCH_AMD64
13805 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13806#else
13807 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13808#endif
13809 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13810 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13811 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13812 else
13813 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13814 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13815 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13816 }
13817
13818 /*
13819 * TlbMiss:
13820 *
13821 * Call helper to do the pushing.
13822 */
13823 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13824
13825#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13826 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13827#else
13828 RT_NOREF(idxInstr);
13829#endif
13830
13831 /* Save variables in volatile registers. */
13832 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13833 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13834 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
13835 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
13836 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13837
13838 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
13839 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
13840 {
13841 /* Swap them using ARG0 as temp register: */
13842 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
13843 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
13844 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
13845 }
13846 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
13847 {
13848 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
13849 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
13850 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13851
13852 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
13853 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13854 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13855 }
13856 else
13857 {
13858 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
13859 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13860
13861 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
13862 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
13863 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
13864 }
13865
13866 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13867 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13868
13869 /* Done setting up parameters, make the call. */
13870 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13871
13872 /* Restore variables and guest shadow registers to volatile registers. */
13873 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13874 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13875
13876#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13877 if (!TlbState.fSkip)
13878 {
13879 /* end of TlbMiss - Jump to the done label. */
13880 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13881 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13882
13883 /*
13884 * TlbLookup:
13885 */
13886 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
13887 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13888
13889 /*
13890 * Emit code to do the actual storing / fetching.
13891 */
13892 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13893# ifdef VBOX_WITH_STATISTICS
13894 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13895 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13896# endif
13897 if (idxRegValue != UINT8_MAX)
13898 {
13899 switch (cbMemAccess)
13900 {
13901 case 2:
13902 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13903 break;
13904 case 4:
13905 if (!fIsIntelSeg)
13906 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13907 else
13908 {
13909 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
13910 PUSH FS in real mode, so we have to try emulate that here.
13911 We borrow the now unused idxReg1 from the TLB lookup code here. */
13912 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
13913 kIemNativeGstReg_EFlags);
13914 if (idxRegEfl != UINT8_MAX)
13915 {
13916#ifdef ARCH_AMD64
13917 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
13918 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13919 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13920#else
13921 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
13922 off, TlbState.idxReg1, idxRegEfl,
13923 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13924#endif
13925 iemNativeRegFreeTmp(pReNative, idxRegEfl);
13926 }
13927 else
13928 {
13929 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
13930 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
13931 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13932 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13933 }
13934 /* ASSUMES the upper half of idxRegValue is ZERO. */
13935 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
13936 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
13937 }
13938 break;
13939 case 8:
13940 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13941 break;
13942 default:
13943 AssertFailed();
13944 }
13945 }
13946 else
13947 {
13948 switch (cbMemAccess)
13949 {
13950 case 2:
13951 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
13952 idxRegMemResult, TlbState.idxReg1);
13953 break;
13954 case 4:
13955 Assert(!fIsSegReg);
13956 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
13957 idxRegMemResult, TlbState.idxReg1);
13958 break;
13959 case 8:
13960 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
13961 break;
13962 default:
13963 AssertFailed();
13964 }
13965 }
13966
13967 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13968 TlbState.freeRegsAndReleaseVars(pReNative);
13969
13970 /*
13971 * TlbDone:
13972 *
13973 * Commit the new RSP value.
13974 */
13975 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13976 }
13977#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
13978
13979 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
13980 iemNativeRegFreeTmp(pReNative, idxRegRsp);
13981 if (idxRegEffSp != idxRegRsp)
13982 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
13983
13984 /* The value variable is implictly flushed. */
13985 if (idxRegValue != UINT8_MAX)
13986 iemNativeVarRegisterRelease(pReNative, idxVarValue);
13987 iemNativeVarFreeLocal(pReNative, idxVarValue);
13988
13989 return off;
13990}
13991
13992
13993
13994/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
13995#define IEM_MC_POP_GREG_U16(a_iGReg) \
13996 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
13997 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
13998#define IEM_MC_POP_GREG_U32(a_iGReg) \
13999 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
14000 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
14001#define IEM_MC_POP_GREG_U64(a_iGReg) \
14002 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
14003 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
14004
14005#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
14006 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
14007 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14008#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
14009 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
14010 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
14011
14012#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
14013 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
14014 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14015#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
14016 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
14017 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
14018
14019
14020DECL_FORCE_INLINE_THROW(uint32_t)
14021iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
14022 uint8_t idxRegTmp)
14023{
14024 /* Use16BitSp: */
14025#ifdef RT_ARCH_AMD64
14026 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14027 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
14028 RT_NOREF(idxRegTmp);
14029#else
14030 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
14031 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
14032 /* add tmp, regrsp, #cbMem */
14033 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
14034 /* and tmp, tmp, #0xffff */
14035 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
14036 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
14037 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
14038 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
14039#endif
14040 return off;
14041}
14042
14043
14044DECL_FORCE_INLINE(uint32_t)
14045iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
14046{
14047 /* Use32BitSp: */
14048 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14049 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
14050 return off;
14051}
14052
14053
14054/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
14055DECL_INLINE_THROW(uint32_t)
14056iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
14057 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
14058{
14059 /*
14060 * Assert sanity.
14061 */
14062 Assert(idxGReg < 16);
14063#ifdef VBOX_STRICT
14064 if (RT_BYTE2(cBitsVarAndFlat) != 0)
14065 {
14066 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14067 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14068 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14069 Assert( pfnFunction
14070 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14071 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
14072 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14073 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
14074 : UINT64_C(0xc000b000a0009000) ));
14075 }
14076 else
14077 Assert( pfnFunction
14078 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
14079 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
14080 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
14081 : UINT64_C(0xc000b000a0009000) ));
14082#endif
14083
14084#ifdef VBOX_STRICT
14085 /*
14086 * Check that the fExec flags we've got make sense.
14087 */
14088 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14089#endif
14090
14091 /*
14092 * To keep things simple we have to commit any pending writes first as we
14093 * may end up making calls.
14094 */
14095 off = iemNativeRegFlushPendingWrites(pReNative, off);
14096
14097 /*
14098 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
14099 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
14100 * directly as the effective stack pointer.
14101 * (Code structure is very similar to that of PUSH)
14102 */
14103 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
14104 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
14105 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
14106 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
14107 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
14108 /** @todo can do a better job picking the register here. For cbMem >= 4 this
14109 * will be the resulting register value. */
14110 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
14111
14112 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
14113 if (cBitsFlat != 0)
14114 {
14115 Assert(idxRegEffSp == idxRegRsp);
14116 Assert(cBitsFlat == 32 || cBitsFlat == 64);
14117 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
14118 }
14119 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
14120 {
14121 Assert(idxRegEffSp != idxRegRsp);
14122 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
14123 kIemNativeGstRegUse_ReadOnly);
14124#ifdef RT_ARCH_AMD64
14125 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14126#else
14127 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14128#endif
14129 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
14130 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
14131 offFixupJumpToUseOtherBitSp = off;
14132 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14133 {
14134/** @todo can skip idxRegRsp updating when popping ESP. */
14135 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
14136 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14137 }
14138 else
14139 {
14140 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
14141 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14142 }
14143 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14144 }
14145 /* SpUpdateEnd: */
14146 uint32_t const offLabelSpUpdateEnd = off;
14147
14148 /*
14149 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
14150 * we're skipping lookup).
14151 */
14152 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
14153 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
14154 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14155 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
14156 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14157 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14158 : UINT32_MAX;
14159
14160 if (!TlbState.fSkip)
14161 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14162 else
14163 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
14164
14165 /*
14166 * Use16BitSp:
14167 */
14168 if (cBitsFlat == 0)
14169 {
14170#ifdef RT_ARCH_AMD64
14171 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14172#else
14173 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14174#endif
14175 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
14176 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14177 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14178 else
14179 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14180 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
14181 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14182 }
14183
14184 /*
14185 * TlbMiss:
14186 *
14187 * Call helper to do the pushing.
14188 */
14189 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
14190
14191#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14192 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14193#else
14194 RT_NOREF(idxInstr);
14195#endif
14196
14197 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
14198 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
14199 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
14200 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14201
14202
14203 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
14204 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
14205 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
14206
14207 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14208 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14209
14210 /* Done setting up parameters, make the call. */
14211 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14212
14213 /* Move the return register content to idxRegMemResult. */
14214 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14215 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14216
14217 /* Restore variables and guest shadow registers to volatile registers. */
14218 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14219 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14220
14221#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14222 if (!TlbState.fSkip)
14223 {
14224 /* end of TlbMiss - Jump to the done label. */
14225 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14226 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14227
14228 /*
14229 * TlbLookup:
14230 */
14231 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
14232 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14233
14234 /*
14235 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
14236 */
14237 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14238# ifdef VBOX_WITH_STATISTICS
14239 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
14240 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
14241# endif
14242 switch (cbMem)
14243 {
14244 case 2:
14245 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14246 break;
14247 case 4:
14248 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14249 break;
14250 case 8:
14251 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14252 break;
14253 default:
14254 AssertFailed();
14255 }
14256
14257 TlbState.freeRegsAndReleaseVars(pReNative);
14258
14259 /*
14260 * TlbDone:
14261 *
14262 * Set the new RSP value (FLAT accesses needs to calculate it first) and
14263 * commit the popped register value.
14264 */
14265 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14266 }
14267#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14268
14269 if (idxGReg != X86_GREG_xSP)
14270 {
14271 /* Set the register. */
14272 if (cbMem >= sizeof(uint32_t))
14273 {
14274#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14275 AssertMsg( pReNative->idxCurCall == 0
14276 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
14277 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
14278#endif
14279 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
14280 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
14281 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14282 }
14283 else
14284 {
14285 Assert(cbMem == sizeof(uint16_t));
14286 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
14287 kIemNativeGstRegUse_ForUpdate);
14288 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
14289 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14290 iemNativeRegFreeTmp(pReNative, idxRegDst);
14291 }
14292
14293 /* Complete RSP calculation for FLAT mode. */
14294 if (idxRegEffSp == idxRegRsp)
14295 {
14296 if (cBitsFlat == 64)
14297 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14298 else
14299 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14300 }
14301 }
14302 else
14303 {
14304 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
14305 if (cbMem == sizeof(uint64_t))
14306 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
14307 else if (cbMem == sizeof(uint32_t))
14308 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
14309 else
14310 {
14311 if (idxRegEffSp == idxRegRsp)
14312 {
14313 if (cBitsFlat == 64)
14314 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14315 else
14316 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14317 }
14318 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
14319 }
14320 }
14321 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
14322
14323 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14324 if (idxRegEffSp != idxRegRsp)
14325 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14326 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14327
14328 return off;
14329}
14330
14331
14332
14333/*********************************************************************************************************************************
14334* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
14335*********************************************************************************************************************************/
14336
14337#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14338 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14339 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14340 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
14341
14342#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14343 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14344 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14345 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
14346
14347#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14348 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14349 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14350 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
14351
14352#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14353 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14354 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14355 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
14356
14357
14358#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14359 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14360 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14361 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
14362
14363#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14364 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14365 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14366 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
14367
14368#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14369 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14370 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14371 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14372
14373#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14374 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14375 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14376 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
14377
14378#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14379 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
14380 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14381 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14382
14383
14384#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14385 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14386 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14387 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
14388
14389#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14390 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14391 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14392 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
14393
14394#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14395 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14396 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14397 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14398
14399#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14400 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14401 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14402 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
14403
14404#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14405 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
14406 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14407 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14408
14409
14410#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14411 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14412 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14413 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
14414
14415#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14416 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14417 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14418 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
14419#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14420 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14421 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14422 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14423
14424#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14425 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14426 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14427 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
14428
14429#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14430 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
14431 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14432 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14433
14434
14435#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14436 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14437 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14438 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
14439
14440#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14441 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14442 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14443 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
14444
14445
14446#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14447 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14448 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14449 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
14450
14451#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14452 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14453 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14454 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
14455
14456#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14457 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14458 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14459 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
14460
14461#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14462 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14463 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14464 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
14465
14466
14467
14468#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14469 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14470 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14471 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
14472
14473#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14474 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14475 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14476 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
14477
14478#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14479 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14480 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14481 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
14482
14483#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14484 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14485 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14486 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
14487
14488
14489#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14490 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14491 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14492 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
14493
14494#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14495 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14496 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14497 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
14498
14499#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14500 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14501 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14502 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14503
14504#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14505 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14506 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14507 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
14508
14509#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
14510 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
14511 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14512 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14513
14514
14515#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14516 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14517 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14518 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
14519
14520#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14521 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14522 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14523 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
14524
14525#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14526 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14527 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14528 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14529
14530#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14531 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14532 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14533 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
14534
14535#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
14536 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
14537 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14538 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14539
14540
14541#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14542 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14543 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14544 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
14545
14546#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14547 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14548 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14549 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
14550
14551#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14552 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14553 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14554 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14555
14556#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14557 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14558 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14559 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
14560
14561#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
14562 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
14563 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14564 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14565
14566
14567#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
14568 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14569 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14570 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
14571
14572#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
14573 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14574 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14575 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
14576
14577
14578#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14579 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14580 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14581 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
14582
14583#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14584 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14585 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14586 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
14587
14588#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14589 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14590 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14591 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
14592
14593#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14594 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14595 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14596 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
14597
14598
14599DECL_INLINE_THROW(uint32_t)
14600iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
14601 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
14602 uintptr_t pfnFunction, uint8_t idxInstr)
14603{
14604 /*
14605 * Assert sanity.
14606 */
14607 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
14608 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
14609 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
14610 && pVarMem->cbVar == sizeof(void *),
14611 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14612
14613 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14614 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14615 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
14616 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
14617 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14618
14619 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
14620 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
14621 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
14622 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
14623 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14624
14625 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
14626
14627 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
14628
14629#ifdef VBOX_STRICT
14630# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
14631 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
14632 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
14633 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
14634 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
14635# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
14636 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
14637 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
14638 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
14639
14640 if (iSegReg == UINT8_MAX)
14641 {
14642 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14643 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14644 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14645 switch (cbMem)
14646 {
14647 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
14648 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
14649 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
14650 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
14651 case 10:
14652 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
14653 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
14654 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14655 break;
14656 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
14657# if 0
14658 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
14659 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
14660# endif
14661 default: AssertFailed(); break;
14662 }
14663 }
14664 else
14665 {
14666 Assert(iSegReg < 6);
14667 switch (cbMem)
14668 {
14669 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
14670 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
14671 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
14672 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
14673 case 10:
14674 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
14675 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
14676 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14677 break;
14678 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
14679# if 0
14680 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
14681 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
14682# endif
14683 default: AssertFailed(); break;
14684 }
14685 }
14686# undef IEM_MAP_HLP_FN
14687# undef IEM_MAP_HLP_FN_NO_AT
14688#endif
14689
14690#ifdef VBOX_STRICT
14691 /*
14692 * Check that the fExec flags we've got make sense.
14693 */
14694 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14695#endif
14696
14697 /*
14698 * To keep things simple we have to commit any pending writes first as we
14699 * may end up making calls.
14700 */
14701 off = iemNativeRegFlushPendingWrites(pReNative, off);
14702
14703#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14704 /*
14705 * Move/spill/flush stuff out of call-volatile registers.
14706 * This is the easy way out. We could contain this to the tlb-miss branch
14707 * by saving and restoring active stuff here.
14708 */
14709 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
14710 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
14711#endif
14712
14713 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
14714 while the tlb-miss codepath will temporarily put it on the stack.
14715 Set the the type to stack here so we don't need to do it twice below. */
14716 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
14717 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
14718 /** @todo use a tmp register from TlbState, since they'll be free after tlb
14719 * lookup is done. */
14720
14721 /*
14722 * Define labels and allocate the result register (trying for the return
14723 * register if we can).
14724 */
14725 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14726 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
14727 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
14728 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
14729 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
14730 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14731 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14732 : UINT32_MAX;
14733//off=iemNativeEmitBrk(pReNative, off, 0);
14734 /*
14735 * Jump to the TLB lookup code.
14736 */
14737 if (!TlbState.fSkip)
14738 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14739
14740 /*
14741 * TlbMiss:
14742 *
14743 * Call helper to do the fetching.
14744 * We flush all guest register shadow copies here.
14745 */
14746 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
14747
14748#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14749 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14750#else
14751 RT_NOREF(idxInstr);
14752#endif
14753
14754#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14755 /* Save variables in volatile registers. */
14756 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
14757 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14758#endif
14759
14760 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
14761 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
14762#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14763 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
14764#else
14765 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14766#endif
14767
14768 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
14769 if (iSegReg != UINT8_MAX)
14770 {
14771 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
14772 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
14773 }
14774
14775 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
14776 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
14777 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
14778
14779 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14780 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14781
14782 /* Done setting up parameters, make the call. */
14783 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14784
14785 /*
14786 * Put the output in the right registers.
14787 */
14788 Assert(idxRegMemResult == pVarMem->idxReg);
14789 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14790 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14791
14792#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14793 /* Restore variables and guest shadow registers to volatile registers. */
14794 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14795 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14796#endif
14797
14798 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
14799 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
14800
14801#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14802 if (!TlbState.fSkip)
14803 {
14804 /* end of tlbsmiss - Jump to the done label. */
14805 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14806 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14807
14808 /*
14809 * TlbLookup:
14810 */
14811 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
14812 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14813# ifdef VBOX_WITH_STATISTICS
14814 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
14815 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
14816# endif
14817
14818 /* [idxVarUnmapInfo] = 0; */
14819 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
14820
14821 /*
14822 * TlbDone:
14823 */
14824 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14825
14826 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
14827
14828# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14829 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
14830 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14831# endif
14832 }
14833#else
14834 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
14835#endif
14836
14837 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
14838 iemNativeVarRegisterRelease(pReNative, idxVarMem);
14839
14840 return off;
14841}
14842
14843
14844#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
14845 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
14846 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
14847
14848#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
14849 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
14850 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
14851
14852#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
14853 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
14854 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
14855
14856#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
14857 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
14858 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
14859
14860DECL_INLINE_THROW(uint32_t)
14861iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
14862 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
14863{
14864 /*
14865 * Assert sanity.
14866 */
14867 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14868#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
14869 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14870#endif
14871 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
14872 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
14873 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
14874#ifdef VBOX_STRICT
14875 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
14876 {
14877 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
14878 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
14879 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
14880 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
14881 case IEM_ACCESS_TYPE_WRITE:
14882 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
14883 case IEM_ACCESS_TYPE_READ:
14884 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
14885 default: AssertFailed();
14886 }
14887#else
14888 RT_NOREF(fAccess);
14889#endif
14890
14891 /*
14892 * To keep things simple we have to commit any pending writes first as we
14893 * may end up making calls (there shouldn't be any at this point, so this
14894 * is just for consistency).
14895 */
14896 /** @todo we could postpone this till we make the call and reload the
14897 * registers after returning from the call. Not sure if that's sensible or
14898 * not, though. */
14899 off = iemNativeRegFlushPendingWrites(pReNative, off);
14900
14901 /*
14902 * Move/spill/flush stuff out of call-volatile registers.
14903 *
14904 * We exclude any register holding the bUnmapInfo variable, as we'll be
14905 * checking it after returning from the call and will free it afterwards.
14906 */
14907 /** @todo save+restore active registers and maybe guest shadows in miss
14908 * scenario. */
14909 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
14910
14911 /*
14912 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
14913 * to call the unmap helper function.
14914 *
14915 * The likelyhood of it being zero is higher than for the TLB hit when doing
14916 * the mapping, as a TLB miss for an well aligned and unproblematic memory
14917 * access should also end up with a mapping that won't need special unmapping.
14918 */
14919 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
14920 * should speed up things for the pure interpreter as well when TLBs
14921 * are enabled. */
14922#ifdef RT_ARCH_AMD64
14923 if (pVarUnmapInfo->idxReg == UINT8_MAX)
14924 {
14925 /* test byte [rbp - xxx], 0ffh */
14926 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
14927 pbCodeBuf[off++] = 0xf6;
14928 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
14929 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
14930 pbCodeBuf[off++] = 0xff;
14931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14932 }
14933 else
14934#endif
14935 {
14936 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
14937 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
14938 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
14939 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
14940 }
14941 uint32_t const offJmpFixup = off;
14942 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
14943
14944 /*
14945 * Call the unmap helper function.
14946 */
14947#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
14948 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14949#else
14950 RT_NOREF(idxInstr);
14951#endif
14952
14953 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
14954 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
14955 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14956
14957 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14958 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14959
14960 /* Done setting up parameters, make the call. */
14961 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14962
14963 /* The bUnmapInfo variable is implictly free by these MCs. */
14964 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
14965
14966 /*
14967 * Done, just fixup the jump for the non-call case.
14968 */
14969 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
14970
14971 return off;
14972}
14973
14974
14975
14976/*********************************************************************************************************************************
14977* State and Exceptions *
14978*********************************************************************************************************************************/
14979
14980#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14981#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
14982
14983#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14984#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14985#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
14986
14987#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14988#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14989#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
14990
14991
14992DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
14993{
14994 /** @todo this needs a lot more work later. */
14995 RT_NOREF(pReNative, fForChange);
14996 return off;
14997}
14998
14999
15000
15001/*********************************************************************************************************************************
15002* Emitters for FPU related operations. *
15003*********************************************************************************************************************************/
15004
15005#define IEM_MC_FETCH_FCW(a_u16Fcw) \
15006 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
15007
15008/** Emits code for IEM_MC_FETCH_FCW. */
15009DECL_INLINE_THROW(uint32_t)
15010iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15011{
15012 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15013 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15014
15015 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15016
15017 /* Allocate a temporary FCW register. */
15018 /** @todo eliminate extra register */
15019 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
15020 kIemNativeGstRegUse_ReadOnly);
15021
15022 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
15023
15024 /* Free but don't flush the FCW register. */
15025 iemNativeRegFreeTmp(pReNative, idxFcwReg);
15026 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15027
15028 return off;
15029}
15030
15031
15032#define IEM_MC_FETCH_FSW(a_u16Fsw) \
15033 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
15034
15035/** Emits code for IEM_MC_FETCH_FSW. */
15036DECL_INLINE_THROW(uint32_t)
15037iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15038{
15039 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15040 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15041
15042 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
15043 /* Allocate a temporary FSW register. */
15044 /** @todo eliminate extra register */
15045 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
15046 kIemNativeGstRegUse_ReadOnly);
15047
15048 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
15049
15050 /* Free but don't flush the FSW register. */
15051 iemNativeRegFreeTmp(pReNative, idxFswReg);
15052 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15053
15054 return off;
15055}
15056
15057
15058
15059#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15060/*********************************************************************************************************************************
15061* Emitters for SSE/AVX specific operations. *
15062*********************************************************************************************************************************/
15063
15064#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
15065 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
15066
15067/** Emits code for IEM_MC_FETCH_FSW. */
15068DECL_INLINE_THROW(uint32_t)
15069iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
15070{
15071 /* Allocate destination and source register. */
15072 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
15073 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
15074 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
15075 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15076
15077 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
15078 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
15079 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
15080
15081 /* Free but don't flush the source and destination register. */
15082 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
15083 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15084
15085 return off;
15086}
15087#endif
15088
15089
15090/*********************************************************************************************************************************
15091* The native code generator functions for each MC block. *
15092*********************************************************************************************************************************/
15093
15094/*
15095 * Include instruction emitters.
15096 */
15097#include "target-x86/IEMAllN8veEmit-x86.h"
15098
15099/*
15100 * Include g_apfnIemNativeRecompileFunctions and associated functions.
15101 *
15102 * This should probably live in it's own file later, but lets see what the
15103 * compile times turn out to be first.
15104 */
15105#include "IEMNativeFunctions.cpp.h"
15106
15107
15108
15109/*********************************************************************************************************************************
15110* Recompiler Core. *
15111*********************************************************************************************************************************/
15112
15113
15114/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
15115static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
15116{
15117 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
15118 pDis->cbCachedInstr += cbMaxRead;
15119 RT_NOREF(cbMinRead);
15120 return VERR_NO_DATA;
15121}
15122
15123
15124DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
15125{
15126 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
15127 {
15128#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
15129 ENTRY(fLocalForcedActions),
15130 ENTRY(iem.s.rcPassUp),
15131 ENTRY(iem.s.fExec),
15132 ENTRY(iem.s.pbInstrBuf),
15133 ENTRY(iem.s.uInstrBufPc),
15134 ENTRY(iem.s.GCPhysInstrBuf),
15135 ENTRY(iem.s.cbInstrBufTotal),
15136 ENTRY(iem.s.idxTbCurInstr),
15137#ifdef VBOX_WITH_STATISTICS
15138 ENTRY(iem.s.StatNativeTlbHitsForFetch),
15139 ENTRY(iem.s.StatNativeTlbHitsForStore),
15140 ENTRY(iem.s.StatNativeTlbHitsForStack),
15141 ENTRY(iem.s.StatNativeTlbHitsForMapped),
15142 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
15143 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
15144 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
15145 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
15146#endif
15147 ENTRY(iem.s.DataTlb.aEntries),
15148 ENTRY(iem.s.DataTlb.uTlbRevision),
15149 ENTRY(iem.s.DataTlb.uTlbPhysRev),
15150 ENTRY(iem.s.DataTlb.cTlbHits),
15151 ENTRY(iem.s.CodeTlb.aEntries),
15152 ENTRY(iem.s.CodeTlb.uTlbRevision),
15153 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
15154 ENTRY(iem.s.CodeTlb.cTlbHits),
15155 ENTRY(pVMR3),
15156 ENTRY(cpum.GstCtx.rax),
15157 ENTRY(cpum.GstCtx.ah),
15158 ENTRY(cpum.GstCtx.rcx),
15159 ENTRY(cpum.GstCtx.ch),
15160 ENTRY(cpum.GstCtx.rdx),
15161 ENTRY(cpum.GstCtx.dh),
15162 ENTRY(cpum.GstCtx.rbx),
15163 ENTRY(cpum.GstCtx.bh),
15164 ENTRY(cpum.GstCtx.rsp),
15165 ENTRY(cpum.GstCtx.rbp),
15166 ENTRY(cpum.GstCtx.rsi),
15167 ENTRY(cpum.GstCtx.rdi),
15168 ENTRY(cpum.GstCtx.r8),
15169 ENTRY(cpum.GstCtx.r9),
15170 ENTRY(cpum.GstCtx.r10),
15171 ENTRY(cpum.GstCtx.r11),
15172 ENTRY(cpum.GstCtx.r12),
15173 ENTRY(cpum.GstCtx.r13),
15174 ENTRY(cpum.GstCtx.r14),
15175 ENTRY(cpum.GstCtx.r15),
15176 ENTRY(cpum.GstCtx.es.Sel),
15177 ENTRY(cpum.GstCtx.es.u64Base),
15178 ENTRY(cpum.GstCtx.es.u32Limit),
15179 ENTRY(cpum.GstCtx.es.Attr),
15180 ENTRY(cpum.GstCtx.cs.Sel),
15181 ENTRY(cpum.GstCtx.cs.u64Base),
15182 ENTRY(cpum.GstCtx.cs.u32Limit),
15183 ENTRY(cpum.GstCtx.cs.Attr),
15184 ENTRY(cpum.GstCtx.ss.Sel),
15185 ENTRY(cpum.GstCtx.ss.u64Base),
15186 ENTRY(cpum.GstCtx.ss.u32Limit),
15187 ENTRY(cpum.GstCtx.ss.Attr),
15188 ENTRY(cpum.GstCtx.ds.Sel),
15189 ENTRY(cpum.GstCtx.ds.u64Base),
15190 ENTRY(cpum.GstCtx.ds.u32Limit),
15191 ENTRY(cpum.GstCtx.ds.Attr),
15192 ENTRY(cpum.GstCtx.fs.Sel),
15193 ENTRY(cpum.GstCtx.fs.u64Base),
15194 ENTRY(cpum.GstCtx.fs.u32Limit),
15195 ENTRY(cpum.GstCtx.fs.Attr),
15196 ENTRY(cpum.GstCtx.gs.Sel),
15197 ENTRY(cpum.GstCtx.gs.u64Base),
15198 ENTRY(cpum.GstCtx.gs.u32Limit),
15199 ENTRY(cpum.GstCtx.gs.Attr),
15200 ENTRY(cpum.GstCtx.rip),
15201 ENTRY(cpum.GstCtx.eflags),
15202 ENTRY(cpum.GstCtx.uRipInhibitInt),
15203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15204 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
15205 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
15206 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
15207 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
15208 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
15209 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
15210 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
15211 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
15212 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
15213 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
15214 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
15215 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
15216 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
15217 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
15218 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
15219 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
15220 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
15221 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
15222 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
15223 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
15224 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
15225 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
15226 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
15227 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
15228 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
15229 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
15230 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
15231 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
15232 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
15233 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
15234 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
15235 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
15236#endif
15237#undef ENTRY
15238 };
15239#ifdef VBOX_STRICT
15240 static bool s_fOrderChecked = false;
15241 if (!s_fOrderChecked)
15242 {
15243 s_fOrderChecked = true;
15244 uint32_t offPrev = s_aMembers[0].off;
15245 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
15246 {
15247 Assert(s_aMembers[i].off > offPrev);
15248 offPrev = s_aMembers[i].off;
15249 }
15250 }
15251#endif
15252
15253 /*
15254 * Binary lookup.
15255 */
15256 unsigned iStart = 0;
15257 unsigned iEnd = RT_ELEMENTS(s_aMembers);
15258 for (;;)
15259 {
15260 unsigned const iCur = iStart + (iEnd - iStart) / 2;
15261 uint32_t const offCur = s_aMembers[iCur].off;
15262 if (off < offCur)
15263 {
15264 if (iCur != iStart)
15265 iEnd = iCur;
15266 else
15267 break;
15268 }
15269 else if (off > offCur)
15270 {
15271 if (iCur + 1 < iEnd)
15272 iStart = iCur + 1;
15273 else
15274 break;
15275 }
15276 else
15277 return s_aMembers[iCur].pszName;
15278 }
15279#ifdef VBOX_WITH_STATISTICS
15280 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
15281 return "iem.s.acThreadedFuncStats[iFn]";
15282#endif
15283 return NULL;
15284}
15285
15286
15287/**
15288 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
15289 * @returns pszBuf.
15290 * @param fFlags The flags.
15291 * @param pszBuf The output buffer.
15292 * @param cbBuf The output buffer size. At least 32 bytes.
15293 */
15294DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
15295{
15296 Assert(cbBuf >= 32);
15297 static RTSTRTUPLE const s_aModes[] =
15298 {
15299 /* [00] = */ { RT_STR_TUPLE("16BIT") },
15300 /* [01] = */ { RT_STR_TUPLE("32BIT") },
15301 /* [02] = */ { RT_STR_TUPLE("!2!") },
15302 /* [03] = */ { RT_STR_TUPLE("!3!") },
15303 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
15304 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
15305 /* [06] = */ { RT_STR_TUPLE("!6!") },
15306 /* [07] = */ { RT_STR_TUPLE("!7!") },
15307 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
15308 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
15309 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
15310 /* [0b] = */ { RT_STR_TUPLE("!b!") },
15311 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
15312 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
15313 /* [0e] = */ { RT_STR_TUPLE("!e!") },
15314 /* [0f] = */ { RT_STR_TUPLE("!f!") },
15315 /* [10] = */ { RT_STR_TUPLE("!10!") },
15316 /* [11] = */ { RT_STR_TUPLE("!11!") },
15317 /* [12] = */ { RT_STR_TUPLE("!12!") },
15318 /* [13] = */ { RT_STR_TUPLE("!13!") },
15319 /* [14] = */ { RT_STR_TUPLE("!14!") },
15320 /* [15] = */ { RT_STR_TUPLE("!15!") },
15321 /* [16] = */ { RT_STR_TUPLE("!16!") },
15322 /* [17] = */ { RT_STR_TUPLE("!17!") },
15323 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
15324 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
15325 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
15326 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
15327 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
15328 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
15329 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
15330 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
15331 };
15332 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
15333 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
15334 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
15335
15336 pszBuf[off++] = ' ';
15337 pszBuf[off++] = 'C';
15338 pszBuf[off++] = 'P';
15339 pszBuf[off++] = 'L';
15340 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
15341 Assert(off < 32);
15342
15343 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
15344
15345 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
15346 {
15347 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
15348 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
15349 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
15350 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
15351 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
15352 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
15353 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
15354 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
15355 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
15356 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
15357 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
15358 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
15359 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
15360 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
15361 };
15362 if (fFlags)
15363 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
15364 if (s_aFlags[i].fFlag & fFlags)
15365 {
15366 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
15367 pszBuf[off++] = ' ';
15368 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
15369 off += s_aFlags[i].cchName;
15370 fFlags &= ~s_aFlags[i].fFlag;
15371 if (!fFlags)
15372 break;
15373 }
15374 pszBuf[off] = '\0';
15375
15376 return pszBuf;
15377}
15378
15379
15380DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
15381{
15382 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
15383#if defined(RT_ARCH_AMD64)
15384 static const char * const a_apszMarkers[] =
15385 {
15386 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
15387 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
15388 };
15389#endif
15390
15391 char szDisBuf[512];
15392 DISSTATE Dis;
15393 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
15394 uint32_t const cNative = pTb->Native.cInstructions;
15395 uint32_t offNative = 0;
15396#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15397 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
15398#endif
15399 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15400 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15401 : DISCPUMODE_64BIT;
15402#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15403 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
15404#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15405 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
15406#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15407# error "Port me"
15408#else
15409 csh hDisasm = ~(size_t)0;
15410# if defined(RT_ARCH_AMD64)
15411 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
15412# elif defined(RT_ARCH_ARM64)
15413 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
15414# else
15415# error "Port me"
15416# endif
15417 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
15418
15419 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
15420 //Assert(rcCs == CS_ERR_OK);
15421#endif
15422
15423 /*
15424 * Print TB info.
15425 */
15426 pHlp->pfnPrintf(pHlp,
15427 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
15428 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
15429 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
15430 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
15431#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15432 if (pDbgInfo && pDbgInfo->cEntries > 1)
15433 {
15434 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
15435
15436 /*
15437 * This disassembly is driven by the debug info which follows the native
15438 * code and indicates when it starts with the next guest instructions,
15439 * where labels are and such things.
15440 */
15441 uint32_t idxThreadedCall = 0;
15442 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
15443 uint8_t idxRange = UINT8_MAX;
15444 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
15445 uint32_t offRange = 0;
15446 uint32_t offOpcodes = 0;
15447 uint32_t const cbOpcodes = pTb->cbOpcodes;
15448 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
15449 uint32_t const cDbgEntries = pDbgInfo->cEntries;
15450 uint32_t iDbgEntry = 1;
15451 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
15452
15453 while (offNative < cNative)
15454 {
15455 /* If we're at or have passed the point where the next chunk of debug
15456 info starts, process it. */
15457 if (offDbgNativeNext <= offNative)
15458 {
15459 offDbgNativeNext = UINT32_MAX;
15460 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
15461 {
15462 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
15463 {
15464 case kIemTbDbgEntryType_GuestInstruction:
15465 {
15466 /* Did the exec flag change? */
15467 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
15468 {
15469 pHlp->pfnPrintf(pHlp,
15470 " fExec change %#08x -> %#08x %s\n",
15471 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15472 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15473 szDisBuf, sizeof(szDisBuf)));
15474 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
15475 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15476 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15477 : DISCPUMODE_64BIT;
15478 }
15479
15480 /* New opcode range? We need to fend up a spurious debug info entry here for cases
15481 where the compilation was aborted before the opcode was recorded and the actual
15482 instruction was translated to a threaded call. This may happen when we run out
15483 of ranges, or when some complicated interrupts/FFs are found to be pending or
15484 similar. So, we just deal with it here rather than in the compiler code as it
15485 is a lot simpler to do here. */
15486 if ( idxRange == UINT8_MAX
15487 || idxRange >= cRanges
15488 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
15489 {
15490 idxRange += 1;
15491 if (idxRange < cRanges)
15492 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
15493 else
15494 continue;
15495 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
15496 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
15497 + (pTb->aRanges[idxRange].idxPhysPage == 0
15498 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15499 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
15500 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15501 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
15502 pTb->aRanges[idxRange].idxPhysPage);
15503 GCPhysPc += offRange;
15504 }
15505
15506 /* Disassemble the instruction. */
15507 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
15508 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
15509 uint32_t cbInstr = 1;
15510 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15511 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
15512 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15513 if (RT_SUCCESS(rc))
15514 {
15515 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15516 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15517 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15518 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15519
15520 static unsigned const s_offMarker = 55;
15521 static char const s_szMarker[] = " ; <--- guest";
15522 if (cch < s_offMarker)
15523 {
15524 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
15525 cch = s_offMarker;
15526 }
15527 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
15528 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
15529
15530 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
15531 }
15532 else
15533 {
15534 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
15535 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
15536 cbInstr = 1;
15537 }
15538 GCPhysPc += cbInstr;
15539 offOpcodes += cbInstr;
15540 offRange += cbInstr;
15541 continue;
15542 }
15543
15544 case kIemTbDbgEntryType_ThreadedCall:
15545 pHlp->pfnPrintf(pHlp,
15546 " Call #%u to %s (%u args) - %s\n",
15547 idxThreadedCall,
15548 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15549 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15550 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
15551 idxThreadedCall++;
15552 continue;
15553
15554 case kIemTbDbgEntryType_GuestRegShadowing:
15555 {
15556 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15557 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
15558 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
15559 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
15560 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15561 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
15562 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
15563 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
15564 else
15565 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
15566 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
15567 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15568 continue;
15569 }
15570
15571#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15572 case kIemTbDbgEntryType_GuestSimdRegShadowing:
15573 {
15574 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15575 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
15576 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
15577 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
15578 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15579 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
15580 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
15581 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
15582 else
15583 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
15584 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
15585 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15586 continue;
15587 }
15588#endif
15589
15590 case kIemTbDbgEntryType_Label:
15591 {
15592 const char *pszName = "what_the_fudge";
15593 const char *pszComment = "";
15594 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
15595 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
15596 {
15597 case kIemNativeLabelType_Return:
15598 pszName = "Return";
15599 break;
15600 case kIemNativeLabelType_ReturnBreak:
15601 pszName = "ReturnBreak";
15602 break;
15603 case kIemNativeLabelType_ReturnWithFlags:
15604 pszName = "ReturnWithFlags";
15605 break;
15606 case kIemNativeLabelType_NonZeroRetOrPassUp:
15607 pszName = "NonZeroRetOrPassUp";
15608 break;
15609 case kIemNativeLabelType_RaiseGp0:
15610 pszName = "RaiseGp0";
15611 break;
15612 case kIemNativeLabelType_RaiseNm:
15613 pszName = "RaiseNm";
15614 break;
15615 case kIemNativeLabelType_RaiseUd:
15616 pszName = "RaiseUd";
15617 break;
15618 case kIemNativeLabelType_RaiseMf:
15619 pszName = "RaiseMf";
15620 break;
15621 case kIemNativeLabelType_RaiseXf:
15622 pszName = "RaiseXf";
15623 break;
15624 case kIemNativeLabelType_ObsoleteTb:
15625 pszName = "ObsoleteTb";
15626 break;
15627 case kIemNativeLabelType_NeedCsLimChecking:
15628 pszName = "NeedCsLimChecking";
15629 break;
15630 case kIemNativeLabelType_CheckBranchMiss:
15631 pszName = "CheckBranchMiss";
15632 break;
15633 case kIemNativeLabelType_If:
15634 pszName = "If";
15635 fNumbered = true;
15636 break;
15637 case kIemNativeLabelType_Else:
15638 pszName = "Else";
15639 fNumbered = true;
15640 pszComment = " ; regs state restored pre-if-block";
15641 break;
15642 case kIemNativeLabelType_Endif:
15643 pszName = "Endif";
15644 fNumbered = true;
15645 break;
15646 case kIemNativeLabelType_CheckIrq:
15647 pszName = "CheckIrq_CheckVM";
15648 fNumbered = true;
15649 break;
15650 case kIemNativeLabelType_TlbLookup:
15651 pszName = "TlbLookup";
15652 fNumbered = true;
15653 break;
15654 case kIemNativeLabelType_TlbMiss:
15655 pszName = "TlbMiss";
15656 fNumbered = true;
15657 break;
15658 case kIemNativeLabelType_TlbDone:
15659 pszName = "TlbDone";
15660 fNumbered = true;
15661 break;
15662 case kIemNativeLabelType_Invalid:
15663 case kIemNativeLabelType_End:
15664 break;
15665 }
15666 if (fNumbered)
15667 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
15668 else
15669 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
15670 continue;
15671 }
15672
15673 case kIemTbDbgEntryType_NativeOffset:
15674 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
15675 Assert(offDbgNativeNext > offNative);
15676 break;
15677
15678#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
15679 case kIemTbDbgEntryType_DelayedPcUpdate:
15680 pHlp->pfnPrintf(pHlp,
15681 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
15682 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
15683 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
15684 continue;
15685#endif
15686
15687 default:
15688 AssertFailed();
15689 }
15690 iDbgEntry++;
15691 break;
15692 }
15693 }
15694
15695 /*
15696 * Disassemble the next native instruction.
15697 */
15698 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15699# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15700 uint32_t cbInstr = sizeof(paNative[0]);
15701 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15702 if (RT_SUCCESS(rc))
15703 {
15704# if defined(RT_ARCH_AMD64)
15705 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15706 {
15707 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15708 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
15709 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
15710 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
15711 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
15712 uInfo & 0x8000 ? "recompiled" : "todo");
15713 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
15714 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
15715 else
15716 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
15717 }
15718 else
15719# endif
15720 {
15721 const char *pszAnnotation = NULL;
15722# ifdef RT_ARCH_AMD64
15723 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15724 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15725 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15726 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15727 PCDISOPPARAM pMemOp;
15728 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
15729 pMemOp = &Dis.Param1;
15730 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
15731 pMemOp = &Dis.Param2;
15732 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
15733 pMemOp = &Dis.Param3;
15734 else
15735 pMemOp = NULL;
15736 if ( pMemOp
15737 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
15738 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
15739 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
15740 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
15741
15742#elif defined(RT_ARCH_ARM64)
15743 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
15744 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15745 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15746# else
15747# error "Port me"
15748# endif
15749 if (pszAnnotation)
15750 {
15751 static unsigned const s_offAnnotation = 55;
15752 size_t const cchAnnotation = strlen(pszAnnotation);
15753 size_t cchDis = strlen(szDisBuf);
15754 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
15755 {
15756 if (cchDis < s_offAnnotation)
15757 {
15758 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
15759 cchDis = s_offAnnotation;
15760 }
15761 szDisBuf[cchDis++] = ' ';
15762 szDisBuf[cchDis++] = ';';
15763 szDisBuf[cchDis++] = ' ';
15764 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
15765 }
15766 }
15767 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
15768 }
15769 }
15770 else
15771 {
15772# if defined(RT_ARCH_AMD64)
15773 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
15774 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
15775# elif defined(RT_ARCH_ARM64)
15776 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
15777# else
15778# error "Port me"
15779# endif
15780 cbInstr = sizeof(paNative[0]);
15781 }
15782 offNative += cbInstr / sizeof(paNative[0]);
15783
15784# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15785 cs_insn *pInstr;
15786 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
15787 (uintptr_t)pNativeCur, 1, &pInstr);
15788 if (cInstrs > 0)
15789 {
15790 Assert(cInstrs == 1);
15791 const char *pszAnnotation = NULL;
15792# if defined(RT_ARCH_ARM64)
15793 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
15794 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
15795 {
15796 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
15797 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
15798 char *psz = strchr(pInstr->op_str, '[');
15799 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
15800 {
15801 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
15802 int32_t off = -1;
15803 psz += 4;
15804 if (*psz == ']')
15805 off = 0;
15806 else if (*psz == ',')
15807 {
15808 psz = RTStrStripL(psz + 1);
15809 if (*psz == '#')
15810 off = RTStrToInt32(&psz[1]);
15811 /** @todo deal with index registers and LSL as well... */
15812 }
15813 if (off >= 0)
15814 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
15815 }
15816 }
15817# endif
15818
15819 size_t const cchOp = strlen(pInstr->op_str);
15820# if defined(RT_ARCH_AMD64)
15821 if (pszAnnotation)
15822 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
15823 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
15824 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15825 else
15826 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
15827 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
15828
15829# else
15830 if (pszAnnotation)
15831 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
15832 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
15833 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15834 else
15835 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
15836 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
15837# endif
15838 offNative += pInstr->size / sizeof(*pNativeCur);
15839 cs_free(pInstr, cInstrs);
15840 }
15841 else
15842 {
15843# if defined(RT_ARCH_AMD64)
15844 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
15845 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
15846# else
15847 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
15848# endif
15849 offNative++;
15850 }
15851# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15852 }
15853 }
15854 else
15855#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
15856 {
15857 /*
15858 * No debug info, just disassemble the x86 code and then the native code.
15859 *
15860 * First the guest code:
15861 */
15862 for (unsigned i = 0; i < pTb->cRanges; i++)
15863 {
15864 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
15865 + (pTb->aRanges[i].idxPhysPage == 0
15866 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15867 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
15868 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15869 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
15870 unsigned off = pTb->aRanges[i].offOpcodes;
15871 /** @todo this ain't working when crossing pages! */
15872 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
15873 while (off < cbOpcodes)
15874 {
15875 uint32_t cbInstr = 1;
15876 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15877 &pTb->pabOpcodes[off], cbOpcodes - off,
15878 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15879 if (RT_SUCCESS(rc))
15880 {
15881 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15882 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15883 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15884 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15885 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
15886 GCPhysPc += cbInstr;
15887 off += cbInstr;
15888 }
15889 else
15890 {
15891 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
15892 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
15893 break;
15894 }
15895 }
15896 }
15897
15898 /*
15899 * Then the native code:
15900 */
15901 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
15902 while (offNative < cNative)
15903 {
15904 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15905# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15906 uint32_t cbInstr = sizeof(paNative[0]);
15907 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15908 if (RT_SUCCESS(rc))
15909 {
15910# if defined(RT_ARCH_AMD64)
15911 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15912 {
15913 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15914 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
15915 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
15916 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
15917 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
15918 uInfo & 0x8000 ? "recompiled" : "todo");
15919 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
15920 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
15921 else
15922 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
15923 }
15924 else
15925# endif
15926 {
15927# ifdef RT_ARCH_AMD64
15928 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15929 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15930 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15931 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15932# elif defined(RT_ARCH_ARM64)
15933 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
15934 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15935 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15936# else
15937# error "Port me"
15938# endif
15939 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
15940 }
15941 }
15942 else
15943 {
15944# if defined(RT_ARCH_AMD64)
15945 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
15946 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
15947# else
15948 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
15949# endif
15950 cbInstr = sizeof(paNative[0]);
15951 }
15952 offNative += cbInstr / sizeof(paNative[0]);
15953
15954# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15955 cs_insn *pInstr;
15956 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
15957 (uintptr_t)pNativeCur, 1, &pInstr);
15958 if (cInstrs > 0)
15959 {
15960 Assert(cInstrs == 1);
15961# if defined(RT_ARCH_AMD64)
15962 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
15963 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
15964# else
15965 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
15966 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
15967# endif
15968 offNative += pInstr->size / sizeof(*pNativeCur);
15969 cs_free(pInstr, cInstrs);
15970 }
15971 else
15972 {
15973# if defined(RT_ARCH_AMD64)
15974 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
15975 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
15976# else
15977 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
15978# endif
15979 offNative++;
15980 }
15981# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15982 }
15983 }
15984
15985#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15986 /* Cleanup. */
15987 cs_close(&hDisasm);
15988#endif
15989}
15990
15991
15992/**
15993 * Recompiles the given threaded TB into a native one.
15994 *
15995 * In case of failure the translation block will be returned as-is.
15996 *
15997 * @returns pTb.
15998 * @param pVCpu The cross context virtual CPU structure of the calling
15999 * thread.
16000 * @param pTb The threaded translation to recompile to native.
16001 */
16002DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
16003{
16004 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
16005
16006 /*
16007 * The first time thru, we allocate the recompiler state, the other times
16008 * we just need to reset it before using it again.
16009 */
16010 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
16011 if (RT_LIKELY(pReNative))
16012 iemNativeReInit(pReNative, pTb);
16013 else
16014 {
16015 pReNative = iemNativeInit(pVCpu, pTb);
16016 AssertReturn(pReNative, pTb);
16017 }
16018
16019#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16020 /*
16021 * First do liveness analysis. This is done backwards.
16022 */
16023 {
16024 uint32_t idxCall = pTb->Thrd.cCalls;
16025 if (idxCall <= pReNative->cLivenessEntriesAlloc)
16026 { /* likely */ }
16027 else
16028 {
16029 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
16030 while (idxCall > cAlloc)
16031 cAlloc *= 2;
16032 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
16033 AssertReturn(pvNew, pTb);
16034 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
16035 pReNative->cLivenessEntriesAlloc = cAlloc;
16036 }
16037 AssertReturn(idxCall > 0, pTb);
16038 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
16039
16040 /* The initial (final) entry. */
16041 idxCall--;
16042 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
16043
16044 /* Loop backwards thru the calls and fill in the other entries. */
16045 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
16046 while (idxCall > 0)
16047 {
16048 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
16049 if (pfnLiveness)
16050 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
16051 else
16052 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
16053 pCallEntry--;
16054 idxCall--;
16055 }
16056
16057# ifdef VBOX_WITH_STATISTICS
16058 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
16059 to 'clobbered' rather that 'input'. */
16060 /** @todo */
16061# endif
16062 }
16063#endif
16064
16065 /*
16066 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
16067 * for aborting if an error happens.
16068 */
16069 uint32_t cCallsLeft = pTb->Thrd.cCalls;
16070#ifdef LOG_ENABLED
16071 uint32_t const cCallsOrg = cCallsLeft;
16072#endif
16073 uint32_t off = 0;
16074 int rc = VINF_SUCCESS;
16075 IEMNATIVE_TRY_SETJMP(pReNative, rc)
16076 {
16077 /*
16078 * Emit prolog code (fixed).
16079 */
16080 off = iemNativeEmitProlog(pReNative, off);
16081
16082 /*
16083 * Convert the calls to native code.
16084 */
16085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16086 int32_t iGstInstr = -1;
16087#endif
16088#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
16089 uint32_t cThreadedCalls = 0;
16090 uint32_t cRecompiledCalls = 0;
16091#endif
16092#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16093 uint32_t idxCurCall = 0;
16094#endif
16095 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
16096 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
16097 while (cCallsLeft-- > 0)
16098 {
16099 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
16100#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16101 pReNative->idxCurCall = idxCurCall;
16102#endif
16103
16104 /*
16105 * Debug info, assembly markup and statistics.
16106 */
16107#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
16108 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
16109 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
16110#endif
16111#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16112 iemNativeDbgInfoAddNativeOffset(pReNative, off);
16113 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
16114 {
16115 if (iGstInstr < (int32_t)pTb->cInstructions)
16116 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
16117 else
16118 Assert(iGstInstr == pTb->cInstructions);
16119 iGstInstr = pCallEntry->idxInstr;
16120 }
16121 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
16122#endif
16123#if defined(VBOX_STRICT)
16124 off = iemNativeEmitMarker(pReNative, off,
16125 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
16126#endif
16127#if defined(VBOX_STRICT)
16128 iemNativeRegAssertSanity(pReNative);
16129#endif
16130#ifdef VBOX_WITH_STATISTICS
16131 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
16132#endif
16133
16134 /*
16135 * Actual work.
16136 */
16137 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
16138 pfnRecom ? "(recompiled)" : "(todo)"));
16139 if (pfnRecom) /** @todo stats on this. */
16140 {
16141 off = pfnRecom(pReNative, off, pCallEntry);
16142 STAM_REL_STATS({cRecompiledCalls++;});
16143 }
16144 else
16145 {
16146 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
16147 STAM_REL_STATS({cThreadedCalls++;});
16148 }
16149 Assert(off <= pReNative->cInstrBufAlloc);
16150 Assert(pReNative->cCondDepth == 0);
16151
16152#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
16153 if (LogIs2Enabled())
16154 {
16155 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
16156# ifndef IEMLIVENESS_EXTENDED_LAYOUT
16157 static const char s_achState[] = "CUXI";
16158# else
16159 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
16160# endif
16161
16162 char szGpr[17];
16163 for (unsigned i = 0; i < 16; i++)
16164 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
16165 szGpr[16] = '\0';
16166
16167 char szSegBase[X86_SREG_COUNT + 1];
16168 char szSegLimit[X86_SREG_COUNT + 1];
16169 char szSegAttrib[X86_SREG_COUNT + 1];
16170 char szSegSel[X86_SREG_COUNT + 1];
16171 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
16172 {
16173 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
16174 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
16175 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
16176 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
16177 }
16178 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
16179 = szSegSel[X86_SREG_COUNT] = '\0';
16180
16181 char szEFlags[8];
16182 for (unsigned i = 0; i < 7; i++)
16183 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
16184 szEFlags[7] = '\0';
16185
16186 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
16187 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
16188 }
16189#endif
16190
16191 /*
16192 * Advance.
16193 */
16194 pCallEntry++;
16195#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16196 idxCurCall++;
16197#endif
16198 }
16199
16200 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
16201 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
16202 if (!cThreadedCalls)
16203 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
16204
16205 /*
16206 * Emit the epilog code.
16207 */
16208 uint32_t idxReturnLabel;
16209 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
16210
16211 /*
16212 * Generate special jump labels.
16213 */
16214 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
16215 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
16216 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
16217 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
16218 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
16219 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
16220 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
16221 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
16222 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
16223 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
16224 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
16225 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
16226 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
16227 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
16228 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
16229 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
16230 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
16231 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
16232 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
16233 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
16234 }
16235 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
16236 {
16237 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
16238 return pTb;
16239 }
16240 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
16241 Assert(off <= pReNative->cInstrBufAlloc);
16242
16243 /*
16244 * Make sure all labels has been defined.
16245 */
16246 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
16247#ifdef VBOX_STRICT
16248 uint32_t const cLabels = pReNative->cLabels;
16249 for (uint32_t i = 0; i < cLabels; i++)
16250 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
16251#endif
16252
16253 /*
16254 * Allocate executable memory, copy over the code we've generated.
16255 */
16256 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
16257 if (pTbAllocator->pDelayedFreeHead)
16258 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
16259
16260 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
16261 AssertReturn(paFinalInstrBuf, pTb);
16262 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
16263
16264 /*
16265 * Apply fixups.
16266 */
16267 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
16268 uint32_t const cFixups = pReNative->cFixups;
16269 for (uint32_t i = 0; i < cFixups; i++)
16270 {
16271 Assert(paFixups[i].off < off);
16272 Assert(paFixups[i].idxLabel < cLabels);
16273 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
16274 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
16275 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
16276 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
16277 switch (paFixups[i].enmType)
16278 {
16279#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
16280 case kIemNativeFixupType_Rel32:
16281 Assert(paFixups[i].off + 4 <= off);
16282 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16283 continue;
16284
16285#elif defined(RT_ARCH_ARM64)
16286 case kIemNativeFixupType_RelImm26At0:
16287 {
16288 Assert(paFixups[i].off < off);
16289 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16290 Assert(offDisp >= -262144 && offDisp < 262144);
16291 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
16292 continue;
16293 }
16294
16295 case kIemNativeFixupType_RelImm19At5:
16296 {
16297 Assert(paFixups[i].off < off);
16298 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16299 Assert(offDisp >= -262144 && offDisp < 262144);
16300 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
16301 continue;
16302 }
16303
16304 case kIemNativeFixupType_RelImm14At5:
16305 {
16306 Assert(paFixups[i].off < off);
16307 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16308 Assert(offDisp >= -8192 && offDisp < 8192);
16309 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
16310 continue;
16311 }
16312
16313#endif
16314 case kIemNativeFixupType_Invalid:
16315 case kIemNativeFixupType_End:
16316 break;
16317 }
16318 AssertFailed();
16319 }
16320
16321 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
16322 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
16323
16324 /*
16325 * Convert the translation block.
16326 */
16327 RTMemFree(pTb->Thrd.paCalls);
16328 pTb->Native.paInstructions = paFinalInstrBuf;
16329 pTb->Native.cInstructions = off;
16330 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
16331#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16332 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
16333 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
16334#endif
16335
16336 Assert(pTbAllocator->cThreadedTbs > 0);
16337 pTbAllocator->cThreadedTbs -= 1;
16338 pTbAllocator->cNativeTbs += 1;
16339 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
16340
16341#ifdef LOG_ENABLED
16342 /*
16343 * Disassemble to the log if enabled.
16344 */
16345 if (LogIs3Enabled())
16346 {
16347 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
16348 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
16349# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
16350 RTLogFlush(NULL);
16351# endif
16352 }
16353#endif
16354 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
16355
16356 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
16357 return pTb;
16358}
16359
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette