VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103756

Last change on this file since 103756 was 103750, checked in by vboxsync, 10 months ago

VMM/IEM: Implement some ofthe amd64 emitters for SSE/AVX, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 733.8 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103750 2024-03-10 20:12:55Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
136static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
137 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
138# endif
139static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
140#endif
141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
142static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
143static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
144#endif
145DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
146DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
147 IEMNATIVEGSTREG enmGstReg, uint32_t off);
148DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
149
150
151/*********************************************************************************************************************************
152* Executable Memory Allocator *
153*********************************************************************************************************************************/
154/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 * Use an alternative chunk sub-allocator that does store internal data
156 * in the chunk.
157 *
158 * Using the RTHeapSimple is not practial on newer darwin systems where
159 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
160 * memory. We would have to change the protection of the whole chunk for
161 * every call to RTHeapSimple, which would be rather expensive.
162 *
163 * This alternative implemenation let restrict page protection modifications
164 * to the pages backing the executable memory we just allocated.
165 */
166#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
167/** The chunk sub-allocation unit size in bytes. */
168#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
169/** The chunk sub-allocation unit size as a shift factor. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
171
172#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
173# ifdef IEMNATIVE_USE_GDB_JIT
174# define IEMNATIVE_USE_GDB_JIT_ET_DYN
175
176/** GDB JIT: Code entry. */
177typedef struct GDBJITCODEENTRY
178{
179 struct GDBJITCODEENTRY *pNext;
180 struct GDBJITCODEENTRY *pPrev;
181 uint8_t *pbSymFile;
182 uint64_t cbSymFile;
183} GDBJITCODEENTRY;
184
185/** GDB JIT: Actions. */
186typedef enum GDBJITACTIONS : uint32_t
187{
188 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
189} GDBJITACTIONS;
190
191/** GDB JIT: Descriptor. */
192typedef struct GDBJITDESCRIPTOR
193{
194 uint32_t uVersion;
195 GDBJITACTIONS enmAction;
196 GDBJITCODEENTRY *pRelevant;
197 GDBJITCODEENTRY *pHead;
198 /** Our addition: */
199 GDBJITCODEENTRY *pTail;
200} GDBJITDESCRIPTOR;
201
202/** GDB JIT: Our simple symbol file data. */
203typedef struct GDBJITSYMFILE
204{
205 Elf64_Ehdr EHdr;
206# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Shdr aShdrs[5];
208# else
209 Elf64_Shdr aShdrs[7];
210 Elf64_Phdr aPhdrs[2];
211# endif
212 /** The dwarf ehframe data for the chunk. */
213 uint8_t abEhFrame[512];
214 char szzStrTab[128];
215 Elf64_Sym aSymbols[3];
216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
217 Elf64_Sym aDynSyms[2];
218 Elf64_Dyn aDyn[6];
219# endif
220} GDBJITSYMFILE;
221
222extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
223extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
224
225/** Init once for g_IemNativeGdbJitLock. */
226static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
227/** Init once for the critical section. */
228static RTCRITSECT g_IemNativeGdbJitLock;
229
230/** GDB reads the info here. */
231GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
232
233/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
234DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
235{
236 ASMNopPause();
237}
238
239/** @callback_method_impl{FNRTONCE} */
240static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
241{
242 RT_NOREF(pvUser);
243 return RTCritSectInit(&g_IemNativeGdbJitLock);
244}
245
246
247# endif /* IEMNATIVE_USE_GDB_JIT */
248
249/**
250 * Per-chunk unwind info for non-windows hosts.
251 */
252typedef struct IEMEXECMEMCHUNKEHFRAME
253{
254# ifdef IEMNATIVE_USE_LIBUNWIND
255 /** The offset of the FDA into abEhFrame. */
256 uintptr_t offFda;
257# else
258 /** 'struct object' storage area. */
259 uint8_t abObject[1024];
260# endif
261# ifdef IEMNATIVE_USE_GDB_JIT
262# if 0
263 /** The GDB JIT 'symbol file' data. */
264 GDBJITSYMFILE GdbJitSymFile;
265# endif
266 /** The GDB JIT list entry. */
267 GDBJITCODEENTRY GdbJitEntry;
268# endif
269 /** The dwarf ehframe data for the chunk. */
270 uint8_t abEhFrame[512];
271} IEMEXECMEMCHUNKEHFRAME;
272/** Pointer to per-chunk info info for non-windows hosts. */
273typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
274#endif
275
276
277/**
278 * An chunk of executable memory.
279 */
280typedef struct IEMEXECMEMCHUNK
281{
282#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
283 /** Number of free items in this chunk. */
284 uint32_t cFreeUnits;
285 /** Hint were to start searching for free space in the allocation bitmap. */
286 uint32_t idxFreeHint;
287#else
288 /** The heap handle. */
289 RTHEAPSIMPLE hHeap;
290#endif
291 /** Pointer to the chunk. */
292 void *pvChunk;
293#ifdef IN_RING3
294 /**
295 * Pointer to the unwind information.
296 *
297 * This is used during C++ throw and longjmp (windows and probably most other
298 * platforms). Some debuggers (windbg) makes use of it as well.
299 *
300 * Windows: This is allocated from hHeap on windows because (at least for
301 * AMD64) the UNWIND_INFO structure address in the
302 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
303 *
304 * Others: Allocated from the regular heap to avoid unnecessary executable data
305 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
306 void *pvUnwindInfo;
307#elif defined(IN_RING0)
308 /** Allocation handle. */
309 RTR0MEMOBJ hMemObj;
310#endif
311} IEMEXECMEMCHUNK;
312/** Pointer to a memory chunk. */
313typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
314
315
316/**
317 * Executable memory allocator for the native recompiler.
318 */
319typedef struct IEMEXECMEMALLOCATOR
320{
321 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
322 uint32_t uMagic;
323
324 /** The chunk size. */
325 uint32_t cbChunk;
326 /** The maximum number of chunks. */
327 uint32_t cMaxChunks;
328 /** The current number of chunks. */
329 uint32_t cChunks;
330 /** Hint where to start looking for available memory. */
331 uint32_t idxChunkHint;
332 /** Statistics: Current number of allocations. */
333 uint32_t cAllocations;
334
335 /** The total amount of memory available. */
336 uint64_t cbTotal;
337 /** Total amount of free memory. */
338 uint64_t cbFree;
339 /** Total amount of memory allocated. */
340 uint64_t cbAllocated;
341
342#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
343 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
344 *
345 * Since the chunk size is a power of two and the minimum chunk size is a lot
346 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
347 * require a whole number of uint64_t elements in the allocation bitmap. So,
348 * for sake of simplicity, they are allocated as one continous chunk for
349 * simplicity/laziness. */
350 uint64_t *pbmAlloc;
351 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
352 uint32_t cUnitsPerChunk;
353 /** Number of bitmap elements per chunk (for quickly locating the bitmap
354 * portion corresponding to an chunk). */
355 uint32_t cBitmapElementsPerChunk;
356#else
357 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
358 * @{ */
359 /** The size of the heap internal block header. This is used to adjust the
360 * request memory size to make sure there is exacly enough room for a header at
361 * the end of the blocks we allocate before the next 64 byte alignment line. */
362 uint32_t cbHeapBlockHdr;
363 /** The size of initial heap allocation required make sure the first
364 * allocation is correctly aligned. */
365 uint32_t cbHeapAlignTweak;
366 /** The alignment tweak allocation address. */
367 void *pvAlignTweak;
368 /** @} */
369#endif
370
371#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
372 /** Pointer to the array of unwind info running parallel to aChunks (same
373 * allocation as this structure, located after the bitmaps).
374 * (For Windows, the structures must reside in 32-bit RVA distance to the
375 * actual chunk, so they are allocated off the chunk.) */
376 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
377#endif
378
379 /** The allocation chunks. */
380 RT_FLEXIBLE_ARRAY_EXTENSION
381 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
382} IEMEXECMEMALLOCATOR;
383/** Pointer to an executable memory allocator. */
384typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
385
386/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
387#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
388
389
390static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
391
392
393/**
394 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
395 * the heap statistics.
396 */
397static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
398 uint32_t cbReq, uint32_t idxChunk)
399{
400 pExecMemAllocator->cAllocations += 1;
401 pExecMemAllocator->cbAllocated += cbReq;
402#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
403 pExecMemAllocator->cbFree -= cbReq;
404#else
405 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
406#endif
407 pExecMemAllocator->idxChunkHint = idxChunk;
408
409#ifdef RT_OS_DARWIN
410 /*
411 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
412 * on darwin. So, we mark the pages returned as read+write after alloc and
413 * expect the caller to call iemExecMemAllocatorReadyForUse when done
414 * writing to the allocation.
415 *
416 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
417 * for details.
418 */
419 /** @todo detect if this is necessary... it wasn't required on 10.15 or
420 * whatever older version it was. */
421 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
422 AssertRC(rc);
423#endif
424
425 return pvRet;
426}
427
428
429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
430static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
431 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
432{
433 /*
434 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
435 */
436 Assert(!(cToScan & 63));
437 Assert(!(idxFirst & 63));
438 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
439 pbmAlloc += idxFirst / 64;
440
441 /*
442 * Scan the bitmap for cReqUnits of consequtive clear bits
443 */
444 /** @todo This can probably be done more efficiently for non-x86 systems. */
445 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
446 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
447 {
448 uint32_t idxAddBit = 1;
449 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
450 idxAddBit++;
451 if (idxAddBit >= cReqUnits)
452 {
453 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
454
455 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
456 pChunk->cFreeUnits -= cReqUnits;
457 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
458
459 void * const pvRet = (uint8_t *)pChunk->pvChunk
460 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
461
462 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
463 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
464 }
465
466 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
467 }
468 return NULL;
469}
470#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
471
472
473static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
474{
475#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
476 /*
477 * Figure out how much to allocate.
478 */
479 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
480 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
481 {
482 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
483 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
484 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
485 {
486 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
487 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
488 if (pvRet)
489 return pvRet;
490 }
491 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
492 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
493 cReqUnits, idxChunk);
494 }
495#else
496 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
497 if (pvRet)
498 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
499#endif
500 return NULL;
501
502}
503
504
505/**
506 * Allocates @a cbReq bytes of executable memory.
507 *
508 * @returns Pointer to the memory, NULL if out of memory or other problem
509 * encountered.
510 * @param pVCpu The cross context virtual CPU structure of the calling
511 * thread.
512 * @param cbReq How many bytes are required.
513 */
514static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
515{
516 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
517 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
518 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
519
520
521 for (unsigned iIteration = 0;; iIteration++)
522 {
523 /*
524 * Adjust the request size so it'll fit the allocator alignment/whatnot.
525 *
526 * For the RTHeapSimple allocator this means to follow the logic described
527 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
528 * existing chunks if we think we've got sufficient free memory around.
529 *
530 * While for the alternative one we just align it up to a whole unit size.
531 */
532#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
533 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
534#else
535 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
536#endif
537 if (cbReq <= pExecMemAllocator->cbFree)
538 {
539 uint32_t const cChunks = pExecMemAllocator->cChunks;
540 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
541 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
548 {
549 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
550 if (pvRet)
551 return pvRet;
552 }
553 }
554
555 /*
556 * Can we grow it with another chunk?
557 */
558 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
559 {
560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
561 AssertLogRelRCReturn(rc, NULL);
562
563 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
564 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
565 if (pvRet)
566 return pvRet;
567 AssertFailed();
568 }
569
570 /*
571 * Try prune native TBs once.
572 */
573 if (iIteration == 0)
574 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
575 else
576 {
577 /** @todo stats... */
578 return NULL;
579 }
580 }
581
582}
583
584
585/** This is a hook that we may need later for changing memory protection back
586 * to readonly+exec */
587static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
588{
589#ifdef RT_OS_DARWIN
590 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
591 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
592 AssertRC(rc); RT_NOREF(pVCpu);
593
594 /*
595 * Flush the instruction cache:
596 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
597 */
598 /* sys_dcache_flush(pv, cb); - not necessary */
599 sys_icache_invalidate(pv, cb);
600#else
601 RT_NOREF(pVCpu, pv, cb);
602#endif
603}
604
605
606/**
607 * Frees executable memory.
608 */
609void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
610{
611 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
612 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
613 Assert(pv);
614#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
615 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
616#else
617 Assert(!((uintptr_t)pv & 63));
618#endif
619
620 /* Align the size as we did when allocating the block. */
621#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
622 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
623#else
624 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
625#endif
626
627 /* Free it / assert sanity. */
628#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
629 uint32_t const cChunks = pExecMemAllocator->cChunks;
630 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
631 bool fFound = false;
632 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
633 {
634 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
635 fFound = offChunk < cbChunk;
636 if (fFound)
637 {
638#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
639 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
640 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
641
642 /* Check that it's valid and free it. */
643 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
644 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
645 for (uint32_t i = 1; i < cReqUnits; i++)
646 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
647 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
648
649 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
650 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
651
652 /* Update the stats. */
653 pExecMemAllocator->cbAllocated -= cb;
654 pExecMemAllocator->cbFree += cb;
655 pExecMemAllocator->cAllocations -= 1;
656 return;
657#else
658 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
659 break;
660#endif
661 }
662 }
663# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
664 AssertFailed();
665# else
666 Assert(fFound);
667# endif
668#endif
669
670#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
671 /* Update stats while cb is freshly calculated.*/
672 pExecMemAllocator->cbAllocated -= cb;
673 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
674 pExecMemAllocator->cAllocations -= 1;
675
676 /* Free it. */
677 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
678#endif
679}
680
681
682
683#ifdef IN_RING3
684# ifdef RT_OS_WINDOWS
685
686/**
687 * Initializes the unwind info structures for windows hosts.
688 */
689static int
690iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
691 void *pvChunk, uint32_t idxChunk)
692{
693 RT_NOREF(pVCpu);
694
695 /*
696 * The AMD64 unwind opcodes.
697 *
698 * This is a program that starts with RSP after a RET instruction that
699 * ends up in recompiled code, and the operations we describe here will
700 * restore all non-volatile registers and bring RSP back to where our
701 * RET address is. This means it's reverse order from what happens in
702 * the prologue.
703 *
704 * Note! Using a frame register approach here both because we have one
705 * and but mainly because the UWOP_ALLOC_LARGE argument values
706 * would be a pain to write initializers for. On the positive
707 * side, we're impervious to changes in the the stack variable
708 * area can can deal with dynamic stack allocations if necessary.
709 */
710 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
711 {
712 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
713 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
714 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
715 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
716 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
717 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
718 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
719 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
720 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
721 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
722 };
723 union
724 {
725 IMAGE_UNWIND_INFO Info;
726 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
727 } s_UnwindInfo =
728 {
729 {
730 /* .Version = */ 1,
731 /* .Flags = */ 0,
732 /* .SizeOfProlog = */ 16, /* whatever */
733 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
734 /* .FrameRegister = */ X86_GREG_xBP,
735 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
736 }
737 };
738 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
739 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
740
741 /*
742 * Calc how much space we need and allocate it off the exec heap.
743 */
744 unsigned const cFunctionEntries = 1;
745 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
746 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
747# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
749 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
750 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
751# else
752 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
753 - pExecMemAllocator->cbHeapBlockHdr;
754 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
755 32 /*cbAlignment*/);
756# endif
757 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
758 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
759
760 /*
761 * Initialize the structures.
762 */
763 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
764
765 paFunctions[0].BeginAddress = 0;
766 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
767 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
768
769 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
770 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
771
772 /*
773 * Register it.
774 */
775 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
776 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
777
778 return VINF_SUCCESS;
779}
780
781
782# else /* !RT_OS_WINDOWS */
783
784/**
785 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
786 */
787DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
788{
789 if (iValue >= 64)
790 {
791 Assert(iValue < 0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
794 }
795 else if (iValue >= 0)
796 *Ptr.pb++ = (uint8_t)iValue;
797 else if (iValue > -64)
798 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
799 else
800 {
801 Assert(iValue > -0x2000);
802 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
803 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
804 }
805 return Ptr;
806}
807
808
809/**
810 * Emits an ULEB128 encoded value (up to 64-bit wide).
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
813{
814 while (uValue >= 0x80)
815 {
816 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
817 uValue >>= 7;
818 }
819 *Ptr.pb++ = (uint8_t)uValue;
820 return Ptr;
821}
822
823
824/**
825 * Emits a CFA rule as register @a uReg + offset @a off.
826 */
827DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
828{
829 *Ptr.pb++ = DW_CFA_def_cfa;
830 Ptr = iemDwarfPutUleb128(Ptr, uReg);
831 Ptr = iemDwarfPutUleb128(Ptr, off);
832 return Ptr;
833}
834
835
836/**
837 * Emits a register (@a uReg) save location:
838 * CFA + @a off * data_alignment_factor
839 */
840DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
841{
842 if (uReg < 0x40)
843 *Ptr.pb++ = DW_CFA_offset | uReg;
844 else
845 {
846 *Ptr.pb++ = DW_CFA_offset_extended;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 }
849 Ptr = iemDwarfPutUleb128(Ptr, off);
850 return Ptr;
851}
852
853
854# if 0 /* unused */
855/**
856 * Emits a register (@a uReg) save location, using signed offset:
857 * CFA + @a offSigned * data_alignment_factor
858 */
859DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
860{
861 *Ptr.pb++ = DW_CFA_offset_extended_sf;
862 Ptr = iemDwarfPutUleb128(Ptr, uReg);
863 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
864 return Ptr;
865}
866# endif
867
868
869/**
870 * Initializes the unwind info section for non-windows hosts.
871 */
872static int
873iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
874 void *pvChunk, uint32_t idxChunk)
875{
876 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
877 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
878
879 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
880
881 /*
882 * Generate the CIE first.
883 */
884# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
885 uint8_t const iDwarfVer = 3;
886# else
887 uint8_t const iDwarfVer = 4;
888# endif
889 RTPTRUNION const PtrCie = Ptr;
890 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
891 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
892 *Ptr.pb++ = iDwarfVer; /* DwARF version */
893 *Ptr.pb++ = 0; /* Augmentation. */
894 if (iDwarfVer >= 4)
895 {
896 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
897 *Ptr.pb++ = 0; /* Segment selector size. */
898 }
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
901# else
902 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
903# endif
904 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
905# ifdef RT_ARCH_AMD64
906 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
907# elif defined(RT_ARCH_ARM64)
908 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
909# else
910# error "port me"
911# endif
912 /* Initial instructions: */
913# ifdef RT_ARCH_AMD64
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
922# elif defined(RT_ARCH_ARM64)
923# if 1
924 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
925# else
926 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
927# endif
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
936 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
937 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
938 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
939 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
940 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
941 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
942# else
943# error "port me"
944# endif
945 while ((Ptr.u - PtrCie.u) & 3)
946 *Ptr.pb++ = DW_CFA_nop;
947 /* Finalize the CIE size. */
948 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
949
950 /*
951 * Generate an FDE for the whole chunk area.
952 */
953# ifdef IEMNATIVE_USE_LIBUNWIND
954 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
955# endif
956 RTPTRUNION const PtrFde = Ptr;
957 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
958 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
959 Ptr.pu32++;
960 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
961 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
962# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
963 *Ptr.pb++ = DW_CFA_nop;
964# endif
965 while ((Ptr.u - PtrFde.u) & 3)
966 *Ptr.pb++ = DW_CFA_nop;
967 /* Finalize the FDE size. */
968 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
969
970 /* Terminator entry. */
971 *Ptr.pu32++ = 0;
972 *Ptr.pu32++ = 0; /* just to be sure... */
973 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
974
975 /*
976 * Register it.
977 */
978# ifdef IEMNATIVE_USE_LIBUNWIND
979 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
980# else
981 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
982 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
983# endif
984
985# ifdef IEMNATIVE_USE_GDB_JIT
986 /*
987 * Now for telling GDB about this (experimental).
988 *
989 * This seems to work best with ET_DYN.
990 */
991 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
992# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
993 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
994 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
995# else
996 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
997 - pExecMemAllocator->cbHeapBlockHdr;
998 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
999# endif
1000 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1001 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1002
1003 RT_ZERO(*pSymFile);
1004
1005 /*
1006 * The ELF header:
1007 */
1008 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1009 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1010 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1011 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1012 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1013 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1014 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1015 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1016# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1017 pSymFile->EHdr.e_type = ET_DYN;
1018# else
1019 pSymFile->EHdr.e_type = ET_REL;
1020# endif
1021# ifdef RT_ARCH_AMD64
1022 pSymFile->EHdr.e_machine = EM_AMD64;
1023# elif defined(RT_ARCH_ARM64)
1024 pSymFile->EHdr.e_machine = EM_AARCH64;
1025# else
1026# error "port me"
1027# endif
1028 pSymFile->EHdr.e_version = 1; /*?*/
1029 pSymFile->EHdr.e_entry = 0;
1030# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1031 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1032# else
1033 pSymFile->EHdr.e_phoff = 0;
1034# endif
1035 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1036 pSymFile->EHdr.e_flags = 0;
1037 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1038# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1039 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1040 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1041# else
1042 pSymFile->EHdr.e_phentsize = 0;
1043 pSymFile->EHdr.e_phnum = 0;
1044# endif
1045 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1046 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1047 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1048
1049 uint32_t offStrTab = 0;
1050#define APPEND_STR(a_szStr) do { \
1051 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1052 offStrTab += sizeof(a_szStr); \
1053 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1054 } while (0)
1055#define APPEND_STR_FMT(a_szStr, ...) do { \
1056 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1057 offStrTab++; \
1058 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1059 } while (0)
1060
1061 /*
1062 * Section headers.
1063 */
1064 /* Section header #0: NULL */
1065 unsigned i = 0;
1066 APPEND_STR("");
1067 RT_ZERO(pSymFile->aShdrs[i]);
1068 i++;
1069
1070 /* Section header: .eh_frame */
1071 pSymFile->aShdrs[i].sh_name = offStrTab;
1072 APPEND_STR(".eh_frame");
1073 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1074 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1075# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1076 pSymFile->aShdrs[i].sh_offset
1077 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1078# else
1079 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1080 pSymFile->aShdrs[i].sh_offset = 0;
1081# endif
1082
1083 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1084 pSymFile->aShdrs[i].sh_link = 0;
1085 pSymFile->aShdrs[i].sh_info = 0;
1086 pSymFile->aShdrs[i].sh_addralign = 1;
1087 pSymFile->aShdrs[i].sh_entsize = 0;
1088 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1089 i++;
1090
1091 /* Section header: .shstrtab */
1092 unsigned const iShStrTab = i;
1093 pSymFile->EHdr.e_shstrndx = iShStrTab;
1094 pSymFile->aShdrs[i].sh_name = offStrTab;
1095 APPEND_STR(".shstrtab");
1096 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1097 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1098# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1099 pSymFile->aShdrs[i].sh_offset
1100 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1101# else
1102 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1103 pSymFile->aShdrs[i].sh_offset = 0;
1104# endif
1105 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1106 pSymFile->aShdrs[i].sh_link = 0;
1107 pSymFile->aShdrs[i].sh_info = 0;
1108 pSymFile->aShdrs[i].sh_addralign = 1;
1109 pSymFile->aShdrs[i].sh_entsize = 0;
1110 i++;
1111
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".symtab");
1115 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1124 i++;
1125
1126# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1127 /* Section header: .symbols */
1128 pSymFile->aShdrs[i].sh_name = offStrTab;
1129 APPEND_STR(".dynsym");
1130 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1131 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1134 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1135 pSymFile->aShdrs[i].sh_link = iShStrTab;
1136 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1137 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1138 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1139 i++;
1140# endif
1141
1142# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1143 /* Section header: .dynamic */
1144 pSymFile->aShdrs[i].sh_name = offStrTab;
1145 APPEND_STR(".dynamic");
1146 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1147 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1148 pSymFile->aShdrs[i].sh_offset
1149 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1150 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1151 pSymFile->aShdrs[i].sh_link = iShStrTab;
1152 pSymFile->aShdrs[i].sh_info = 0;
1153 pSymFile->aShdrs[i].sh_addralign = 1;
1154 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1155 i++;
1156# endif
1157
1158 /* Section header: .text */
1159 unsigned const iShText = i;
1160 pSymFile->aShdrs[i].sh_name = offStrTab;
1161 APPEND_STR(".text");
1162 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1163 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1164# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1165 pSymFile->aShdrs[i].sh_offset
1166 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1167# else
1168 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1169 pSymFile->aShdrs[i].sh_offset = 0;
1170# endif
1171 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1172 pSymFile->aShdrs[i].sh_link = 0;
1173 pSymFile->aShdrs[i].sh_info = 0;
1174 pSymFile->aShdrs[i].sh_addralign = 1;
1175 pSymFile->aShdrs[i].sh_entsize = 0;
1176 i++;
1177
1178 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1179
1180# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1181 /*
1182 * The program headers:
1183 */
1184 /* Everything in a single LOAD segment: */
1185 i = 0;
1186 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1187 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = 0;
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1193 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1194 i++;
1195 /* The .dynamic segment. */
1196 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1197 pSymFile->aPhdrs[i].p_flags = PF_R;
1198 pSymFile->aPhdrs[i].p_offset
1199 = pSymFile->aPhdrs[i].p_vaddr
1200 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1201 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1202 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1203 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1204 i++;
1205
1206 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1207
1208 /*
1209 * The dynamic section:
1210 */
1211 i = 0;
1212 pSymFile->aDyn[i].d_tag = DT_SONAME;
1213 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1214 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1223 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1224 i++;
1225 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1226 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1227 i++;
1228 pSymFile->aDyn[i].d_tag = DT_NULL;
1229 i++;
1230 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1231# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1232
1233 /*
1234 * Symbol tables:
1235 */
1236 /** @todo gdb doesn't seem to really like this ... */
1237 i = 0;
1238 pSymFile->aSymbols[i].st_name = 0;
1239 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1240 pSymFile->aSymbols[i].st_value = 0;
1241 pSymFile->aSymbols[i].st_size = 0;
1242 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1243 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1244# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1245 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1246# endif
1247 i++;
1248
1249 pSymFile->aSymbols[i].st_name = 0;
1250 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1251 pSymFile->aSymbols[i].st_value = 0;
1252 pSymFile->aSymbols[i].st_size = 0;
1253 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1254 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1255 i++;
1256
1257 pSymFile->aSymbols[i].st_name = offStrTab;
1258 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1259# if 0
1260 pSymFile->aSymbols[i].st_shndx = iShText;
1261 pSymFile->aSymbols[i].st_value = 0;
1262# else
1263 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1264 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1265# endif
1266 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1267 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1268 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1269# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1270 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1271 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1272# endif
1273 i++;
1274
1275 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1276 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1277
1278 /*
1279 * The GDB JIT entry and informing GDB.
1280 */
1281 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1282# if 1
1283 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1284# else
1285 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1286# endif
1287
1288 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1289 RTCritSectEnter(&g_IemNativeGdbJitLock);
1290 pEhFrame->GdbJitEntry.pNext = NULL;
1291 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1292 if (__jit_debug_descriptor.pTail)
1293 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1294 else
1295 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1296 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1297 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1298
1299 /* Notify GDB: */
1300 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1301 __jit_debug_register_code();
1302 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1303 RTCritSectLeave(&g_IemNativeGdbJitLock);
1304
1305# else /* !IEMNATIVE_USE_GDB_JIT */
1306 RT_NOREF(pVCpu);
1307# endif /* !IEMNATIVE_USE_GDB_JIT */
1308
1309 return VINF_SUCCESS;
1310}
1311
1312# endif /* !RT_OS_WINDOWS */
1313#endif /* IN_RING3 */
1314
1315
1316/**
1317 * Adds another chunk to the executable memory allocator.
1318 *
1319 * This is used by the init code for the initial allocation and later by the
1320 * regular allocator function when it's out of memory.
1321 */
1322static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1323{
1324 /* Check that we've room for growth. */
1325 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1326 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1327
1328 /* Allocate a chunk. */
1329#ifdef RT_OS_DARWIN
1330 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1331#else
1332 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1333#endif
1334 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1335
1336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1337 int rc = VINF_SUCCESS;
1338#else
1339 /* Initialize the heap for the chunk. */
1340 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1341 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1342 AssertRC(rc);
1343 if (RT_SUCCESS(rc))
1344 {
1345 /*
1346 * We want the memory to be aligned on 64 byte, so the first time thru
1347 * here we do some exploratory allocations to see how we can achieve this.
1348 * On subsequent runs we only make an initial adjustment allocation, if
1349 * necessary.
1350 *
1351 * Since we own the heap implementation, we know that the internal block
1352 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1353 * so all we need to wrt allocation size adjustments is to add 32 bytes
1354 * to the size, align up by 64 bytes, and subtract 32 bytes.
1355 *
1356 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1357 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1358 * allocation to force subsequent allocations to return 64 byte aligned
1359 * user areas.
1360 */
1361 if (!pExecMemAllocator->cbHeapBlockHdr)
1362 {
1363 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1364 pExecMemAllocator->cbHeapAlignTweak = 64;
1365 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1366 32 /*cbAlignment*/);
1367 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1368
1369 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1376 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1377 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1378 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1379 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1380
1381 RTHeapSimpleFree(hHeap, pvTest2);
1382 RTHeapSimpleFree(hHeap, pvTest1);
1383 }
1384 else
1385 {
1386 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1387 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1388 }
1389 if (RT_SUCCESS(rc))
1390#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1391 {
1392 /*
1393 * Add the chunk.
1394 *
1395 * This must be done before the unwind init so windows can allocate
1396 * memory from the chunk when using the alternative sub-allocator.
1397 */
1398 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1399#ifdef IN_RING3
1400 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1401#endif
1402#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1403 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1404#else
1405 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1406 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1407 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1408 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1409#endif
1410
1411 pExecMemAllocator->cChunks = idxChunk + 1;
1412 pExecMemAllocator->idxChunkHint = idxChunk;
1413
1414#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1415 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1416 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1417#else
1418 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1419 pExecMemAllocator->cbTotal += cbFree;
1420 pExecMemAllocator->cbFree += cbFree;
1421#endif
1422
1423#ifdef IN_RING3
1424 /*
1425 * Initialize the unwind information (this cannot really fail atm).
1426 * (This sets pvUnwindInfo.)
1427 */
1428 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1429 if (RT_SUCCESS(rc))
1430#endif
1431 {
1432 return VINF_SUCCESS;
1433 }
1434
1435#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1436 /* Just in case the impossible happens, undo the above up: */
1437 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1438 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1439 pExecMemAllocator->cChunks = idxChunk;
1440 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1441 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1442 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1443 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1444#endif
1445 }
1446#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1447 }
1448#endif
1449 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1450 RT_NOREF(pVCpu);
1451 return rc;
1452}
1453
1454
1455/**
1456 * Initializes the executable memory allocator for native recompilation on the
1457 * calling EMT.
1458 *
1459 * @returns VBox status code.
1460 * @param pVCpu The cross context virtual CPU structure of the calling
1461 * thread.
1462 * @param cbMax The max size of the allocator.
1463 * @param cbInitial The initial allocator size.
1464 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1465 * dependent).
1466 */
1467int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1468{
1469 /*
1470 * Validate input.
1471 */
1472 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1473 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1474 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1475 || cbChunk == 0
1476 || ( RT_IS_POWER_OF_TWO(cbChunk)
1477 && cbChunk >= _1M
1478 && cbChunk <= _256M
1479 && cbChunk <= cbMax),
1480 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1481 VERR_OUT_OF_RANGE);
1482
1483 /*
1484 * Adjust/figure out the chunk size.
1485 */
1486 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1487 {
1488 if (cbMax >= _256M)
1489 cbChunk = _64M;
1490 else
1491 {
1492 if (cbMax < _16M)
1493 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1494 else
1495 cbChunk = (uint32_t)cbMax / 4;
1496 if (!RT_IS_POWER_OF_TWO(cbChunk))
1497 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1498 }
1499 }
1500
1501 if (cbChunk > cbMax)
1502 cbMax = cbChunk;
1503 else
1504 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1505 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1506 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1507
1508 /*
1509 * Allocate and initialize the allocatore instance.
1510 */
1511 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1512#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1513 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1514 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1515 cbNeeded += cbBitmap * cMaxChunks;
1516 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1517 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1518#endif
1519#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1520 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1521 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1522#endif
1523 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1524 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1525 VERR_NO_MEMORY);
1526 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1527 pExecMemAllocator->cbChunk = cbChunk;
1528 pExecMemAllocator->cMaxChunks = cMaxChunks;
1529 pExecMemAllocator->cChunks = 0;
1530 pExecMemAllocator->idxChunkHint = 0;
1531 pExecMemAllocator->cAllocations = 0;
1532 pExecMemAllocator->cbTotal = 0;
1533 pExecMemAllocator->cbFree = 0;
1534 pExecMemAllocator->cbAllocated = 0;
1535#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1536 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1537 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1538 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1539 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1540#endif
1541#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1542 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1543#endif
1544 for (uint32_t i = 0; i < cMaxChunks; i++)
1545 {
1546#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1547 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1548 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1549#else
1550 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1551#endif
1552 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1553#ifdef IN_RING0
1554 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1555#else
1556 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1557#endif
1558 }
1559 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1560
1561 /*
1562 * Do the initial allocations.
1563 */
1564 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1565 {
1566 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1567 AssertLogRelRCReturn(rc, rc);
1568 }
1569
1570 pExecMemAllocator->idxChunkHint = 0;
1571
1572 return VINF_SUCCESS;
1573}
1574
1575
1576/*********************************************************************************************************************************
1577* Native Recompilation *
1578*********************************************************************************************************************************/
1579
1580
1581/**
1582 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1588}
1589
1590
1591/**
1592 * Used by TB code when it wants to raise a \#GP(0).
1593 */
1594IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1595{
1596 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1597#ifndef _MSC_VER
1598 return VINF_IEM_RAISED_XCPT; /* not reached */
1599#endif
1600}
1601
1602
1603/**
1604 * Used by TB code when it wants to raise a \#NM.
1605 */
1606IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1607{
1608 iemRaiseDeviceNotAvailableJmp(pVCpu);
1609#ifndef _MSC_VER
1610 return VINF_IEM_RAISED_XCPT; /* not reached */
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code when it wants to raise a \#UD.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1619{
1620 iemRaiseUndefinedOpcodeJmp(pVCpu);
1621#ifndef _MSC_VER
1622 return VINF_IEM_RAISED_XCPT; /* not reached */
1623#endif
1624}
1625
1626
1627/**
1628 * Used by TB code when it wants to raise a \#MF.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1631{
1632 iemRaiseMathFaultJmp(pVCpu);
1633#ifndef _MSC_VER
1634 return VINF_IEM_RAISED_XCPT; /* not reached */
1635#endif
1636}
1637
1638
1639/**
1640 * Used by TB code when it wants to raise a \#XF.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1643{
1644 iemRaiseSimdFpExceptionJmp(pVCpu);
1645#ifndef _MSC_VER
1646 return VINF_IEM_RAISED_XCPT; /* not reached */
1647#endif
1648}
1649
1650
1651/**
1652 * Used by TB code when detecting opcode changes.
1653 * @see iemThreadeFuncWorkerObsoleteTb
1654 */
1655IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1656{
1657 /* We set fSafeToFree to false where as we're being called in the context
1658 of a TB callback function, which for native TBs means we cannot release
1659 the executable memory till we've returned our way back to iemTbExec as
1660 that return path codes via the native code generated for the TB. */
1661 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1662 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667/**
1668 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1669 */
1670IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1671{
1672 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1673 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1674 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1675 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1676 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1677 return VINF_IEM_REEXEC_BREAK;
1678}
1679
1680
1681/**
1682 * Used by TB code when we missed a PC check after a branch.
1683 */
1684IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1685{
1686 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1687 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1688 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1689 pVCpu->iem.s.pbInstrBuf));
1690 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1691 return VINF_IEM_REEXEC_BREAK;
1692}
1693
1694
1695
1696/*********************************************************************************************************************************
1697* Helpers: Segmented memory fetches and stores. *
1698*********************************************************************************************************************************/
1699
1700/**
1701 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/**
1714 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1715 * to 16 bits.
1716 */
1717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1718{
1719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1720 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1721#else
1722 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1723#endif
1724}
1725
1726
1727/**
1728 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1729 * to 32 bits.
1730 */
1731IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1732{
1733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1734 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1735#else
1736 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1737#endif
1738}
1739
1740/**
1741 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1742 * to 64 bits.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1745{
1746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1747 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1748#else
1749 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1750#endif
1751}
1752
1753
1754/**
1755 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1769 * to 32 bits.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1772{
1773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1774 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1775#else
1776 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1777#endif
1778}
1779
1780
1781/**
1782 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1783 * to 64 bits.
1784 */
1785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1786{
1787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1788 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1789#else
1790 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1791#endif
1792}
1793
1794
1795/**
1796 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1799{
1800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1801 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1802#else
1803 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1804#endif
1805}
1806
1807
1808/**
1809 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1810 * to 64 bits.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1828 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1829#else
1830 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1831#endif
1832}
1833
1834
1835/**
1836 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1837 */
1838IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1839{
1840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1841 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1842#else
1843 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1844#endif
1845}
1846
1847
1848/**
1849 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1850 */
1851IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1852{
1853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1854 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1855#else
1856 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1857#endif
1858}
1859
1860
1861/**
1862 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1865{
1866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1867 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1868#else
1869 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1870#endif
1871}
1872
1873
1874/**
1875 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1880 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1881#else
1882 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1883#endif
1884}
1885
1886
1887
1888/**
1889 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1895#else
1896 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1908#else
1909 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to store an 32-bit selector value onto a generic stack.
1916 *
1917 * Intel CPUs doesn't do write a whole dword, thus the special function.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1922 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1923#else
1924 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1935 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1936#else
1937 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1948 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1949#else
1950 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1957 */
1958IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1959{
1960#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1961 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1962#else
1963 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1964#endif
1965}
1966
1967
1968/**
1969 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1970 */
1971IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1972{
1973#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1974 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1975#else
1976 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1977#endif
1978}
1979
1980
1981
1982/*********************************************************************************************************************************
1983* Helpers: Flat memory fetches and stores. *
1984*********************************************************************************************************************************/
1985
1986/**
1987 * Used by TB code to load unsigned 8-bit data w/ flat address.
1988 * @note Zero extending the value to 64-bit to simplify assembly.
1989 */
1990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1991{
1992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1993 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1994#else
1995 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1996#endif
1997}
1998
1999
2000/**
2001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2002 * to 16 bits.
2003 * @note Zero extending the value to 64-bit to simplify assembly.
2004 */
2005IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2006{
2007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2009#else
2010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2011#endif
2012}
2013
2014
2015/**
2016 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2017 * to 32 bits.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2032 * to 64 bits.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2035{
2036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2037 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2038#else
2039 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2040#endif
2041}
2042
2043
2044/**
2045 * Used by TB code to load unsigned 16-bit data w/ flat address.
2046 * @note Zero extending the value to 64-bit to simplify assembly.
2047 */
2048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2049{
2050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2051 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2052#else
2053 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2054#endif
2055}
2056
2057
2058/**
2059 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2060 * to 32 bits.
2061 * @note Zero extending the value to 64-bit to simplify assembly.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2067#else
2068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2075 * to 64 bits.
2076 * @note Zero extending the value to 64-bit to simplify assembly.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2081 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2082#else
2083 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to load unsigned 32-bit data w/ flat address.
2090 * @note Zero extending the value to 64-bit to simplify assembly.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2104 * to 64 bits.
2105 * @note Zero extending the value to 64-bit to simplify assembly.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2110 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2111#else
2112 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to load unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2123 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2124#else
2125 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2126#endif
2127}
2128
2129
2130/**
2131 * Used by TB code to store unsigned 8-bit data w/ flat address.
2132 */
2133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2134{
2135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2136 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2137#else
2138 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2139#endif
2140}
2141
2142
2143/**
2144 * Used by TB code to store unsigned 16-bit data w/ flat address.
2145 */
2146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2147{
2148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2149 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2150#else
2151 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2152#endif
2153}
2154
2155
2156/**
2157 * Used by TB code to store unsigned 32-bit data w/ flat address.
2158 */
2159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2160{
2161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2162 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2163#else
2164 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2165#endif
2166}
2167
2168
2169/**
2170 * Used by TB code to store unsigned 64-bit data w/ flat address.
2171 */
2172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2173{
2174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2175 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2176#else
2177 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2178#endif
2179}
2180
2181
2182
2183/**
2184 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2190#else
2191 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2203#else
2204 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to store a segment selector value onto a flat stack.
2211 *
2212 * Intel CPUs doesn't do write a whole dword, thus the special function.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2217 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2218#else
2219 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2220#endif
2221}
2222
2223
2224/**
2225 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2226 */
2227IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2228{
2229#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2230 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2231#else
2232 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2233#endif
2234}
2235
2236
2237/**
2238 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2243 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2244#else
2245 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2246#endif
2247}
2248
2249
2250/**
2251 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2256 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2257#else
2258 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2259#endif
2260}
2261
2262
2263/**
2264 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2269 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2270#else
2271 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276
2277/*********************************************************************************************************************************
2278* Helpers: Segmented memory mapping. *
2279*********************************************************************************************************************************/
2280
2281/**
2282 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2283 * segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2312 */
2313IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2314 RTGCPTR GCPtrMem, uint8_t iSegReg))
2315{
2316#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2317 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2318#else
2319 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2320#endif
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2328 RTGCPTR GCPtrMem, uint8_t iSegReg))
2329{
2330#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2331 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2332#else
2333 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2334#endif
2335}
2336
2337
2338/**
2339 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2340 * segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2369 */
2370IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2371 RTGCPTR GCPtrMem, uint8_t iSegReg))
2372{
2373#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2374 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2375#else
2376 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2377#endif
2378}
2379
2380
2381/**
2382 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2383 */
2384IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2385 RTGCPTR GCPtrMem, uint8_t iSegReg))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2388 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2389#else
2390 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2397 * segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2426 */
2427IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2428 RTGCPTR GCPtrMem, uint8_t iSegReg))
2429{
2430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2431 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2432#else
2433 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2434#endif
2435}
2436
2437
2438/**
2439 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2440 */
2441IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2442 RTGCPTR GCPtrMem, uint8_t iSegReg))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2445 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2446#else
2447 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2454 * segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2511 */
2512IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2513 RTGCPTR GCPtrMem, uint8_t iSegReg))
2514{
2515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2516 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2517#else
2518 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2519#endif
2520}
2521
2522
2523/**
2524 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2525 */
2526IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2527 RTGCPTR GCPtrMem, uint8_t iSegReg))
2528{
2529#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2530 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2531#else
2532 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2533#endif
2534}
2535
2536
2537/**
2538 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2539 * segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/**
2567 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2568 */
2569IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2570 RTGCPTR GCPtrMem, uint8_t iSegReg))
2571{
2572#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2573 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2574#else
2575 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2576#endif
2577}
2578
2579
2580/**
2581 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2582 */
2583IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2584 RTGCPTR GCPtrMem, uint8_t iSegReg))
2585{
2586#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2587 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2588#else
2589 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2590#endif
2591}
2592
2593
2594/*********************************************************************************************************************************
2595* Helpers: Flat memory mapping. *
2596*********************************************************************************************************************************/
2597
2598/**
2599 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2600 * address.
2601 */
2602IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2603{
2604#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2605 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2606#else
2607 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2608#endif
2609}
2610
2611
2612/**
2613 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2614 */
2615IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2616{
2617#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2618 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2619#else
2620 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2621#endif
2622}
2623
2624
2625/**
2626 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2627 */
2628IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2629{
2630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2631 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2632#else
2633 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2634#endif
2635}
2636
2637
2638/**
2639 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2640 */
2641IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2642{
2643#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2644 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2645#else
2646 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2647#endif
2648}
2649
2650
2651/**
2652 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2653 * address.
2654 */
2655IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2656{
2657#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2658 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2659#else
2660 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2661#endif
2662}
2663
2664
2665/**
2666 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2667 */
2668IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2672#else
2673 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2682{
2683#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2684 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2685#else
2686 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2687#endif
2688}
2689
2690
2691/**
2692 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2693 */
2694IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2698#else
2699 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2706 * address.
2707 */
2708IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2712#else
2713 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2722{
2723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2724 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2725#else
2726 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2727#endif
2728}
2729
2730
2731/**
2732 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2733 */
2734IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2735{
2736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2737 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2738#else
2739 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2740#endif
2741}
2742
2743
2744/**
2745 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2746 */
2747IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2748{
2749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2750 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2751#else
2752 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2753#endif
2754}
2755
2756
2757/**
2758 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2759 * address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2786 */
2787IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2788{
2789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2790 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2791#else
2792 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2793#endif
2794}
2795
2796
2797/**
2798 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2799 */
2800IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2801{
2802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2803 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2804#else
2805 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2806#endif
2807}
2808
2809
2810/**
2811 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2812 */
2813IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2814{
2815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2816 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2817#else
2818 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2819#endif
2820}
2821
2822
2823/**
2824 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2825 */
2826IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2827{
2828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2829 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2830#else
2831 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2832#endif
2833}
2834
2835
2836/**
2837 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2838 * address.
2839 */
2840IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2841{
2842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2843 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2844#else
2845 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2846#endif
2847}
2848
2849
2850/**
2851 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2857#else
2858 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2867{
2868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2869 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2870#else
2871 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2872#endif
2873}
2874
2875
2876/**
2877 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2878 */
2879IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2880{
2881#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2882 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2883#else
2884 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2885#endif
2886}
2887
2888
2889/*********************************************************************************************************************************
2890* Helpers: Commit, rollback & unmap *
2891*********************************************************************************************************************************/
2892
2893/**
2894 * Used by TB code to commit and unmap a read-write memory mapping.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2897{
2898 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2899}
2900
2901
2902/**
2903 * Used by TB code to commit and unmap a read-write memory mapping.
2904 */
2905IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2906{
2907 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2908}
2909
2910
2911/**
2912 * Used by TB code to commit and unmap a write-only memory mapping.
2913 */
2914IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2915{
2916 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2917}
2918
2919
2920/**
2921 * Used by TB code to commit and unmap a read-only memory mapping.
2922 */
2923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2924{
2925 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2926}
2927
2928
2929/**
2930 * Reinitializes the native recompiler state.
2931 *
2932 * Called before starting a new recompile job.
2933 */
2934static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2935{
2936 pReNative->cLabels = 0;
2937 pReNative->bmLabelTypes = 0;
2938 pReNative->cFixups = 0;
2939#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2940 pReNative->pDbgInfo->cEntries = 0;
2941#endif
2942 pReNative->pTbOrg = pTb;
2943 pReNative->cCondDepth = 0;
2944 pReNative->uCondSeqNo = 0;
2945 pReNative->uCheckIrqSeqNo = 0;
2946 pReNative->uTlbSeqNo = 0;
2947
2948#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2949 pReNative->Core.offPc = 0;
2950 pReNative->Core.cInstrPcUpdateSkipped = 0;
2951#endif
2952 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2953#if IEMNATIVE_HST_GREG_COUNT < 32
2954 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2955#endif
2956 ;
2957 pReNative->Core.bmHstRegsWithGstShadow = 0;
2958 pReNative->Core.bmGstRegShadows = 0;
2959 pReNative->Core.bmVars = 0;
2960 pReNative->Core.bmStack = 0;
2961 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2962 pReNative->Core.u64ArgVars = UINT64_MAX;
2963
2964 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2965 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2968 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2969 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2970 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2971 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2972 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2973 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2974 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2975 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2976 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2977 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2978
2979 /* Full host register reinit: */
2980 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2981 {
2982 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2983 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2984 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2985 }
2986
2987 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2988 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2989#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2990 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2991#endif
2992#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2993 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2994#endif
2995#ifdef IEMNATIVE_REG_FIXED_TMP1
2996 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2999 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3000#endif
3001 );
3002 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3003 {
3004 fRegs &= ~RT_BIT_32(idxReg);
3005 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3006 }
3007
3008 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3009#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3010 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3011#endif
3012#ifdef IEMNATIVE_REG_FIXED_TMP0
3013 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3014#endif
3015#ifdef IEMNATIVE_REG_FIXED_TMP1
3016 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3017#endif
3018#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3019 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3020#endif
3021
3022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3023# ifdef RT_ARCH_ARM64
3024 /*
3025 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3026 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3027 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3028 * and the register allocator assumes that it will be always free when the lower is picked.
3029 */
3030 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3031# else
3032 uint32_t const fFixedAdditional = 0;
3033# endif
3034
3035 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3036 | fFixedAdditional
3037# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3038 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3039# endif
3040 ;
3041 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3042 pReNative->Core.bmGstSimdRegShadows = 0;
3043 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3044 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3045
3046 /* Full host register reinit: */
3047 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3048 {
3049 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3050 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3051 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3052 }
3053
3054 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3055 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3056 {
3057 fRegs &= ~RT_BIT_32(idxReg);
3058 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3059 }
3060
3061#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3062 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3063#endif
3064
3065#endif
3066
3067 return pReNative;
3068}
3069
3070
3071/**
3072 * Allocates and initializes the native recompiler state.
3073 *
3074 * This is called the first time an EMT wants to recompile something.
3075 *
3076 * @returns Pointer to the new recompiler state.
3077 * @param pVCpu The cross context virtual CPU structure of the calling
3078 * thread.
3079 * @param pTb The TB that's about to be recompiled.
3080 * @thread EMT(pVCpu)
3081 */
3082static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3083{
3084 VMCPU_ASSERT_EMT(pVCpu);
3085
3086 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3087 AssertReturn(pReNative, NULL);
3088
3089 /*
3090 * Try allocate all the buffers and stuff we need.
3091 */
3092 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3093 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3094 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3095#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3096 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3097#endif
3098 if (RT_LIKELY( pReNative->pInstrBuf
3099 && pReNative->paLabels
3100 && pReNative->paFixups)
3101#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3102 && pReNative->pDbgInfo
3103#endif
3104 )
3105 {
3106 /*
3107 * Set the buffer & array sizes on success.
3108 */
3109 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3110 pReNative->cLabelsAlloc = _8K;
3111 pReNative->cFixupsAlloc = _16K;
3112#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3113 pReNative->cDbgInfoAlloc = _16K;
3114#endif
3115
3116 /* Other constant stuff: */
3117 pReNative->pVCpu = pVCpu;
3118
3119 /*
3120 * Done, just need to save it and reinit it.
3121 */
3122 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3123 return iemNativeReInit(pReNative, pTb);
3124 }
3125
3126 /*
3127 * Failed. Cleanup and return.
3128 */
3129 AssertFailed();
3130 RTMemFree(pReNative->pInstrBuf);
3131 RTMemFree(pReNative->paLabels);
3132 RTMemFree(pReNative->paFixups);
3133#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3134 RTMemFree(pReNative->pDbgInfo);
3135#endif
3136 RTMemFree(pReNative);
3137 return NULL;
3138}
3139
3140
3141/**
3142 * Creates a label
3143 *
3144 * If the label does not yet have a defined position,
3145 * call iemNativeLabelDefine() later to set it.
3146 *
3147 * @returns Label ID. Throws VBox status code on failure, so no need to check
3148 * the return value.
3149 * @param pReNative The native recompile state.
3150 * @param enmType The label type.
3151 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3152 * label is not yet defined (default).
3153 * @param uData Data associated with the lable. Only applicable to
3154 * certain type of labels. Default is zero.
3155 */
3156DECL_HIDDEN_THROW(uint32_t)
3157iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3158 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3159{
3160 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3161
3162 /*
3163 * Locate existing label definition.
3164 *
3165 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3166 * and uData is zero.
3167 */
3168 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3169 uint32_t const cLabels = pReNative->cLabels;
3170 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3171#ifndef VBOX_STRICT
3172 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3173 && offWhere == UINT32_MAX
3174 && uData == 0
3175#endif
3176 )
3177 {
3178#ifndef VBOX_STRICT
3179 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3182 if (idxLabel < pReNative->cLabels)
3183 return idxLabel;
3184#else
3185 for (uint32_t i = 0; i < cLabels; i++)
3186 if ( paLabels[i].enmType == enmType
3187 && paLabels[i].uData == uData)
3188 {
3189 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3190 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3191 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3192 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3193 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3194 return i;
3195 }
3196 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3197 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3198#endif
3199 }
3200
3201 /*
3202 * Make sure we've got room for another label.
3203 */
3204 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3205 { /* likely */ }
3206 else
3207 {
3208 uint32_t cNew = pReNative->cLabelsAlloc;
3209 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3210 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3211 cNew *= 2;
3212 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3213 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3214 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3215 pReNative->paLabels = paLabels;
3216 pReNative->cLabelsAlloc = cNew;
3217 }
3218
3219 /*
3220 * Define a new label.
3221 */
3222 paLabels[cLabels].off = offWhere;
3223 paLabels[cLabels].enmType = enmType;
3224 paLabels[cLabels].uData = uData;
3225 pReNative->cLabels = cLabels + 1;
3226
3227 Assert((unsigned)enmType < 64);
3228 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3229
3230 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3231 {
3232 Assert(uData == 0);
3233 pReNative->aidxUniqueLabels[enmType] = cLabels;
3234 }
3235
3236 if (offWhere != UINT32_MAX)
3237 {
3238#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3239 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3240 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3241#endif
3242 }
3243 return cLabels;
3244}
3245
3246
3247/**
3248 * Defines the location of an existing label.
3249 *
3250 * @param pReNative The native recompile state.
3251 * @param idxLabel The label to define.
3252 * @param offWhere The position.
3253 */
3254DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3255{
3256 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3257 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3258 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3259 pLabel->off = offWhere;
3260#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3261 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3262 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3263#endif
3264}
3265
3266
3267/**
3268 * Looks up a lable.
3269 *
3270 * @returns Label ID if found, UINT32_MAX if not.
3271 */
3272static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3273 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3274{
3275 Assert((unsigned)enmType < 64);
3276 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3277 {
3278 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3279 return pReNative->aidxUniqueLabels[enmType];
3280
3281 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3282 uint32_t const cLabels = pReNative->cLabels;
3283 for (uint32_t i = 0; i < cLabels; i++)
3284 if ( paLabels[i].enmType == enmType
3285 && paLabels[i].uData == uData
3286 && ( paLabels[i].off == offWhere
3287 || offWhere == UINT32_MAX
3288 || paLabels[i].off == UINT32_MAX))
3289 return i;
3290 }
3291 return UINT32_MAX;
3292}
3293
3294
3295/**
3296 * Adds a fixup.
3297 *
3298 * @throws VBox status code (int) on failure.
3299 * @param pReNative The native recompile state.
3300 * @param offWhere The instruction offset of the fixup location.
3301 * @param idxLabel The target label ID for the fixup.
3302 * @param enmType The fixup type.
3303 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3304 */
3305DECL_HIDDEN_THROW(void)
3306iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3307 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3308{
3309 Assert(idxLabel <= UINT16_MAX);
3310 Assert((unsigned)enmType <= UINT8_MAX);
3311
3312 /*
3313 * Make sure we've room.
3314 */
3315 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3316 uint32_t const cFixups = pReNative->cFixups;
3317 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3318 { /* likely */ }
3319 else
3320 {
3321 uint32_t cNew = pReNative->cFixupsAlloc;
3322 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3323 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3324 cNew *= 2;
3325 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3326 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3327 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3328 pReNative->paFixups = paFixups;
3329 pReNative->cFixupsAlloc = cNew;
3330 }
3331
3332 /*
3333 * Add the fixup.
3334 */
3335 paFixups[cFixups].off = offWhere;
3336 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3337 paFixups[cFixups].enmType = enmType;
3338 paFixups[cFixups].offAddend = offAddend;
3339 pReNative->cFixups = cFixups + 1;
3340}
3341
3342
3343/**
3344 * Slow code path for iemNativeInstrBufEnsure.
3345 */
3346DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3347{
3348 /* Double the buffer size till we meet the request. */
3349 uint32_t cNew = pReNative->cInstrBufAlloc;
3350 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3351 do
3352 cNew *= 2;
3353 while (cNew < off + cInstrReq);
3354
3355 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3356#ifdef RT_ARCH_ARM64
3357 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3358#else
3359 uint32_t const cbMaxInstrBuf = _2M;
3360#endif
3361 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3362
3363 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3364 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3365
3366#ifdef VBOX_STRICT
3367 pReNative->offInstrBufChecked = off + cInstrReq;
3368#endif
3369 pReNative->cInstrBufAlloc = cNew;
3370 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3371}
3372
3373#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3374
3375/**
3376 * Grows the static debug info array used during recompilation.
3377 *
3378 * @returns Pointer to the new debug info block; throws VBox status code on
3379 * failure, so no need to check the return value.
3380 */
3381DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3382{
3383 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3384 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3385 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3386 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3387 pReNative->pDbgInfo = pDbgInfo;
3388 pReNative->cDbgInfoAlloc = cNew;
3389 return pDbgInfo;
3390}
3391
3392
3393/**
3394 * Adds a new debug info uninitialized entry, returning the pointer to it.
3395 */
3396DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3397{
3398 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3399 { /* likely */ }
3400 else
3401 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3402 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3403}
3404
3405
3406/**
3407 * Debug Info: Adds a native offset record, if necessary.
3408 */
3409static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3410{
3411 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3412
3413 /*
3414 * Search backwards to see if we've got a similar record already.
3415 */
3416 uint32_t idx = pDbgInfo->cEntries;
3417 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3418 while (idx-- > idxStop)
3419 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3420 {
3421 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3422 return;
3423 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3424 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3425 break;
3426 }
3427
3428 /*
3429 * Add it.
3430 */
3431 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3432 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3433 pEntry->NativeOffset.offNative = off;
3434}
3435
3436
3437/**
3438 * Debug Info: Record info about a label.
3439 */
3440static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3441{
3442 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3443 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3444 pEntry->Label.uUnused = 0;
3445 pEntry->Label.enmLabel = (uint8_t)enmType;
3446 pEntry->Label.uData = uData;
3447}
3448
3449
3450/**
3451 * Debug Info: Record info about a threaded call.
3452 */
3453static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3454{
3455 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3456 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3457 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3458 pEntry->ThreadedCall.uUnused = 0;
3459 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3460}
3461
3462
3463/**
3464 * Debug Info: Record info about a new guest instruction.
3465 */
3466static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3467{
3468 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3469 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3470 pEntry->GuestInstruction.uUnused = 0;
3471 pEntry->GuestInstruction.fExec = fExec;
3472}
3473
3474
3475/**
3476 * Debug Info: Record info about guest register shadowing.
3477 */
3478static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3479 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3480{
3481 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3482 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3483 pEntry->GuestRegShadowing.uUnused = 0;
3484 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3485 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3486 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3487}
3488
3489
3490# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3491/**
3492 * Debug Info: Record info about guest register shadowing.
3493 */
3494static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3495 uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
3496{
3497 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3498 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3499 pEntry->GuestSimdRegShadowing.uUnused = 0;
3500 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3501 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3502 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3503}
3504# endif
3505
3506
3507# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3508/**
3509 * Debug Info: Record info about delayed RIP updates.
3510 */
3511static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3512{
3513 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3514 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3515 pEntry->DelayedPcUpdate.offPc = offPc;
3516 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3517}
3518# endif
3519
3520#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3521
3522
3523/*********************************************************************************************************************************
3524* Register Allocator *
3525*********************************************************************************************************************************/
3526
3527/**
3528 * Register parameter indexes (indexed by argument number).
3529 */
3530DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3531{
3532 IEMNATIVE_CALL_ARG0_GREG,
3533 IEMNATIVE_CALL_ARG1_GREG,
3534 IEMNATIVE_CALL_ARG2_GREG,
3535 IEMNATIVE_CALL_ARG3_GREG,
3536#if defined(IEMNATIVE_CALL_ARG4_GREG)
3537 IEMNATIVE_CALL_ARG4_GREG,
3538# if defined(IEMNATIVE_CALL_ARG5_GREG)
3539 IEMNATIVE_CALL_ARG5_GREG,
3540# if defined(IEMNATIVE_CALL_ARG6_GREG)
3541 IEMNATIVE_CALL_ARG6_GREG,
3542# if defined(IEMNATIVE_CALL_ARG7_GREG)
3543 IEMNATIVE_CALL_ARG7_GREG,
3544# endif
3545# endif
3546# endif
3547#endif
3548};
3549
3550/**
3551 * Call register masks indexed by argument count.
3552 */
3553DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3554{
3555 0,
3556 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3557 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3559 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3560 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3561#if defined(IEMNATIVE_CALL_ARG4_GREG)
3562 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3564# if defined(IEMNATIVE_CALL_ARG5_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3567# if defined(IEMNATIVE_CALL_ARG6_GREG)
3568 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3569 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3570 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3571# if defined(IEMNATIVE_CALL_ARG7_GREG)
3572 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3573 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3574 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3575# endif
3576# endif
3577# endif
3578#endif
3579};
3580
3581#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3582/**
3583 * BP offset of the stack argument slots.
3584 *
3585 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3586 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3587 */
3588DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3589{
3590 IEMNATIVE_FP_OFF_STACK_ARG0,
3591# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3592 IEMNATIVE_FP_OFF_STACK_ARG1,
3593# endif
3594# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3595 IEMNATIVE_FP_OFF_STACK_ARG2,
3596# endif
3597# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3598 IEMNATIVE_FP_OFF_STACK_ARG3,
3599# endif
3600};
3601AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3602#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3603
3604/**
3605 * Info about shadowed guest register values.
3606 * @see IEMNATIVEGSTREG
3607 */
3608static struct
3609{
3610 /** Offset in VMCPU. */
3611 uint32_t off;
3612 /** The field size. */
3613 uint8_t cb;
3614 /** Name (for logging). */
3615 const char *pszName;
3616} const g_aGstShadowInfo[] =
3617{
3618#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3620 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3621 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3622 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3623 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3624 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3625 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3626 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3627 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3628 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3629 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3630 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3631 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3632 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3633 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3634 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3635 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3636 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3637 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3638 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3639 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3640 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3641 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3642 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3643 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3644 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3645 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3646 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3647 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3648 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3649 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3650 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3651 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3652 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3653 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3654 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3655 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3656 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3657 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3658 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3659 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3660 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3661 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3662 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3663 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3664 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3665 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3666 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3667#undef CPUMCTX_OFF_AND_SIZE
3668};
3669AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3670
3671
3672/** Host CPU general purpose register names. */
3673DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3674{
3675#ifdef RT_ARCH_AMD64
3676 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3677#elif RT_ARCH_ARM64
3678 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3679 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3680#else
3681# error "port me"
3682#endif
3683};
3684
3685
3686DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3687 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3688{
3689 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3690
3691 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3692 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3693 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3694 return (uint8_t)idxReg;
3695}
3696
3697
3698#if 0 /* unused */
3699/**
3700 * Tries to locate a suitable register in the given register mask.
3701 *
3702 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3703 * failed.
3704 *
3705 * @returns Host register number on success, returns UINT8_MAX on failure.
3706 */
3707static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3708{
3709 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3710 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3711 if (fRegs)
3712 {
3713 /** @todo pick better here: */
3714 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3715
3716 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3717 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3718 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3719 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3720
3721 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3722 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3723 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3724 return idxReg;
3725 }
3726 return UINT8_MAX;
3727}
3728#endif /* unused */
3729
3730
3731/**
3732 * Locate a register, possibly freeing one up.
3733 *
3734 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3735 * failed.
3736 *
3737 * @returns Host register number on success. Returns UINT8_MAX if no registers
3738 * found, the caller is supposed to deal with this and raise a
3739 * allocation type specific status code (if desired).
3740 *
3741 * @throws VBox status code if we're run into trouble spilling a variable of
3742 * recording debug info. Does NOT throw anything if we're out of
3743 * registers, though.
3744 */
3745static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3746 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3747{
3748 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3749 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3750 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3751
3752 /*
3753 * Try a freed register that's shadowing a guest register.
3754 */
3755 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3756 if (fRegs)
3757 {
3758 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3759
3760#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3761 /*
3762 * When we have livness information, we use it to kick out all shadowed
3763 * guest register that will not be needed any more in this TB. If we're
3764 * lucky, this may prevent us from ending up here again.
3765 *
3766 * Note! We must consider the previous entry here so we don't free
3767 * anything that the current threaded function requires (current
3768 * entry is produced by the next threaded function).
3769 */
3770 uint32_t const idxCurCall = pReNative->idxCurCall;
3771 if (idxCurCall > 0)
3772 {
3773 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3774
3775# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3776 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3777 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3778 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3779#else
3780 /* Construct a mask of the registers not in the read or write state.
3781 Note! We could skips writes, if they aren't from us, as this is just
3782 a hack to prevent trashing registers that have just been written
3783 or will be written when we retire the current instruction. */
3784 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3785 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3786 & IEMLIVENESSBIT_MASK;
3787#endif
3788 /* Merge EFLAGS. */
3789 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3790 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3791 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3792 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3793 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3794
3795 /* If it matches any shadowed registers. */
3796 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3797 {
3798 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3799 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3800 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3801
3802 /* See if we've got any unshadowed registers we can return now. */
3803 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3804 if (fUnshadowedRegs)
3805 {
3806 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3807 return (fPreferVolatile
3808 ? ASMBitFirstSetU32(fUnshadowedRegs)
3809 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3810 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3811 - 1;
3812 }
3813 }
3814 }
3815#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3816
3817 unsigned const idxReg = (fPreferVolatile
3818 ? ASMBitFirstSetU32(fRegs)
3819 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3820 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3821 - 1;
3822
3823 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3824 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3825 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3826 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3827
3828 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3829 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3830 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3831 return idxReg;
3832 }
3833
3834 /*
3835 * Try free up a variable that's in a register.
3836 *
3837 * We do two rounds here, first evacuating variables we don't need to be
3838 * saved on the stack, then in the second round move things to the stack.
3839 */
3840 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3841 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3842 {
3843 uint32_t fVars = pReNative->Core.bmVars;
3844 while (fVars)
3845 {
3846 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3847 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3848 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3849 && (RT_BIT_32(idxReg) & fRegMask)
3850 && ( iLoop == 0
3851 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3852 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3853 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3854 {
3855 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3856 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3857 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3858 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3859 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3860 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3861
3862 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3863 {
3864 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3865 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3866 }
3867
3868 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3870
3871 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3872 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3873 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3874 return idxReg;
3875 }
3876 fVars &= ~RT_BIT_32(idxVar);
3877 }
3878 }
3879
3880 return UINT8_MAX;
3881}
3882
3883
3884/**
3885 * Reassigns a variable to a different register specified by the caller.
3886 *
3887 * @returns The new code buffer position.
3888 * @param pReNative The native recompile state.
3889 * @param off The current code buffer position.
3890 * @param idxVar The variable index.
3891 * @param idxRegOld The old host register number.
3892 * @param idxRegNew The new host register number.
3893 * @param pszCaller The caller for logging.
3894 */
3895static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3896 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3897{
3898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3899 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3900 RT_NOREF(pszCaller);
3901
3902 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3903
3904 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3905 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3906 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3908
3909 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3910 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3911 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3912 if (fGstRegShadows)
3913 {
3914 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3915 | RT_BIT_32(idxRegNew);
3916 while (fGstRegShadows)
3917 {
3918 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3919 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3920
3921 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3922 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3923 }
3924 }
3925
3926 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3927 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3928 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3929 return off;
3930}
3931
3932
3933/**
3934 * Moves a variable to a different register or spills it onto the stack.
3935 *
3936 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3937 * kinds can easily be recreated if needed later.
3938 *
3939 * @returns The new code buffer position.
3940 * @param pReNative The native recompile state.
3941 * @param off The current code buffer position.
3942 * @param idxVar The variable index.
3943 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3944 * call-volatile registers.
3945 */
3946static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3947 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3948{
3949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3950 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3951 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3952 Assert(!pVar->fRegAcquired);
3953
3954 uint8_t const idxRegOld = pVar->idxReg;
3955 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3956 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3957 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3958 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3959 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3960 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3961 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3962 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3963
3964
3965 /** @todo Add statistics on this.*/
3966 /** @todo Implement basic variable liveness analysis (python) so variables
3967 * can be freed immediately once no longer used. This has the potential to
3968 * be trashing registers and stack for dead variables.
3969 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3970
3971 /*
3972 * First try move it to a different register, as that's cheaper.
3973 */
3974 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3975 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3976 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3977 if (fRegs)
3978 {
3979 /* Avoid using shadow registers, if possible. */
3980 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3981 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3982 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3983 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3984 }
3985
3986 /*
3987 * Otherwise we must spill the register onto the stack.
3988 */
3989 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3990 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3991 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3992 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3993
3994 pVar->idxReg = UINT8_MAX;
3995 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3996 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3997 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3998 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3999 return off;
4000}
4001
4002
4003/**
4004 * Allocates a temporary host general purpose register.
4005 *
4006 * This may emit code to save register content onto the stack in order to free
4007 * up a register.
4008 *
4009 * @returns The host register number; throws VBox status code on failure,
4010 * so no need to check the return value.
4011 * @param pReNative The native recompile state.
4012 * @param poff Pointer to the variable with the code buffer position.
4013 * This will be update if we need to move a variable from
4014 * register to stack in order to satisfy the request.
4015 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4016 * registers (@c true, default) or the other way around
4017 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4018 */
4019DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4020{
4021 /*
4022 * Try find a completely unused register, preferably a call-volatile one.
4023 */
4024 uint8_t idxReg;
4025 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4026 & ~pReNative->Core.bmHstRegsWithGstShadow
4027 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4028 if (fRegs)
4029 {
4030 if (fPreferVolatile)
4031 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4032 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4033 else
4034 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4035 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4036 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4037 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4038 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4039 }
4040 else
4041 {
4042 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4043 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4044 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4045 }
4046 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4047}
4048
4049
4050/**
4051 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4052 * registers.
4053 *
4054 * @returns The host register number; throws VBox status code on failure,
4055 * so no need to check the return value.
4056 * @param pReNative The native recompile state.
4057 * @param poff Pointer to the variable with the code buffer position.
4058 * This will be update if we need to move a variable from
4059 * register to stack in order to satisfy the request.
4060 * @param fRegMask Mask of acceptable registers.
4061 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4062 * registers (@c true, default) or the other way around
4063 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4064 */
4065DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4066 bool fPreferVolatile /*= true*/)
4067{
4068 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4069 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4070
4071 /*
4072 * Try find a completely unused register, preferably a call-volatile one.
4073 */
4074 uint8_t idxReg;
4075 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4076 & ~pReNative->Core.bmHstRegsWithGstShadow
4077 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4078 & fRegMask;
4079 if (fRegs)
4080 {
4081 if (fPreferVolatile)
4082 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4083 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4084 else
4085 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4086 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4087 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4088 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4089 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4090 }
4091 else
4092 {
4093 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4094 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4095 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4096 }
4097 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4098}
4099
4100
4101/**
4102 * Allocates a temporary register for loading an immediate value into.
4103 *
4104 * This will emit code to load the immediate, unless there happens to be an
4105 * unused register with the value already loaded.
4106 *
4107 * The caller will not modify the returned register, it must be considered
4108 * read-only. Free using iemNativeRegFreeTmpImm.
4109 *
4110 * @returns The host register number; throws VBox status code on failure, so no
4111 * need to check the return value.
4112 * @param pReNative The native recompile state.
4113 * @param poff Pointer to the variable with the code buffer position.
4114 * @param uImm The immediate value that the register must hold upon
4115 * return.
4116 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4117 * registers (@c true, default) or the other way around
4118 * (@c false).
4119 *
4120 * @note Reusing immediate values has not been implemented yet.
4121 */
4122DECL_HIDDEN_THROW(uint8_t)
4123iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4124{
4125 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4126 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4127 return idxReg;
4128}
4129
4130#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4131
4132# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4133/**
4134 * Helper for iemNativeLivenessGetStateByGstReg.
4135 *
4136 * @returns IEMLIVENESS_STATE_XXX
4137 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4138 * ORed together.
4139 */
4140DECL_FORCE_INLINE(uint32_t)
4141iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4142{
4143 /* INPUT trumps anything else. */
4144 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4145 return IEMLIVENESS_STATE_INPUT;
4146
4147 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4148 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4149 {
4150 /* If not all sub-fields are clobbered they must be considered INPUT. */
4151 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4152 return IEMLIVENESS_STATE_INPUT;
4153 return IEMLIVENESS_STATE_CLOBBERED;
4154 }
4155
4156 /* XCPT_OR_CALL trumps UNUSED. */
4157 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4158 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4159
4160 return IEMLIVENESS_STATE_UNUSED;
4161}
4162# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4163
4164
4165DECL_FORCE_INLINE(uint32_t)
4166iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4167{
4168# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4169 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4170 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4171# else
4172 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4173 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4174 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4175 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4176# endif
4177}
4178
4179
4180DECL_FORCE_INLINE(uint32_t)
4181iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4182{
4183 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4184 if (enmGstReg == kIemNativeGstReg_EFlags)
4185 {
4186 /* Merge the eflags states to one. */
4187# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4188 uRet = RT_BIT_32(uRet);
4189 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4190 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4191 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4192 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4193 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4194 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4195 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4196# else
4197 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4198 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4199 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4200 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4201 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4202 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4203 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4204# endif
4205 }
4206 return uRet;
4207}
4208
4209
4210# ifdef VBOX_STRICT
4211/** For assertions only, user checks that idxCurCall isn't zerow. */
4212DECL_FORCE_INLINE(uint32_t)
4213iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4214{
4215 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4216}
4217# endif /* VBOX_STRICT */
4218
4219#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4220
4221/**
4222 * Marks host register @a idxHstReg as containing a shadow copy of guest
4223 * register @a enmGstReg.
4224 *
4225 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4226 * host register before calling.
4227 */
4228DECL_FORCE_INLINE(void)
4229iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4230{
4231 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4232 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4233 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4234
4235 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4236 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4237 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4238 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4239#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4240 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4241 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4242#else
4243 RT_NOREF(off);
4244#endif
4245}
4246
4247
4248/**
4249 * Clear any guest register shadow claims from @a idxHstReg.
4250 *
4251 * The register does not need to be shadowing any guest registers.
4252 */
4253DECL_FORCE_INLINE(void)
4254iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4255{
4256 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4257 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4258 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4259 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4260 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4261
4262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4263 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4264 if (fGstRegs)
4265 {
4266 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4267 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4268 while (fGstRegs)
4269 {
4270 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4271 fGstRegs &= ~RT_BIT_64(iGstReg);
4272 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4273 }
4274 }
4275#else
4276 RT_NOREF(off);
4277#endif
4278
4279 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4282}
4283
4284
4285/**
4286 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4287 * and global overview flags.
4288 */
4289DECL_FORCE_INLINE(void)
4290iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4291{
4292 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4293 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4294 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4295 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4296 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4297 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4298 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4299
4300#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4301 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4302 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4303#else
4304 RT_NOREF(off);
4305#endif
4306
4307 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4308 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4309 if (!fGstRegShadowsNew)
4310 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4311 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4312}
4313
4314
4315#if 0 /* unused */
4316/**
4317 * Clear any guest register shadow claim for @a enmGstReg.
4318 */
4319DECL_FORCE_INLINE(void)
4320iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4321{
4322 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4323 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4324 {
4325 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4326 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4327 }
4328}
4329#endif
4330
4331
4332/**
4333 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4334 * as the new shadow of it.
4335 *
4336 * Unlike the other guest reg shadow helpers, this does the logging for you.
4337 * However, it is the liveness state is not asserted here, the caller must do
4338 * that.
4339 */
4340DECL_FORCE_INLINE(void)
4341iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4342 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4343{
4344 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4345 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4346 {
4347 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4348 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4349 if (idxHstRegOld == idxHstRegNew)
4350 return;
4351 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4352 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4353 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4354 }
4355 else
4356 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4357 g_aGstShadowInfo[enmGstReg].pszName));
4358 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4359}
4360
4361
4362/**
4363 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4364 * to @a idxRegTo.
4365 */
4366DECL_FORCE_INLINE(void)
4367iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4368 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4369{
4370 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4371 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4372 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4373 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4374 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4375 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4376 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4377 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4378 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4379
4380 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4381 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4382 if (!fGstRegShadowsFrom)
4383 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4384 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4385 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4386 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4387#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4388 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4389 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4390#else
4391 RT_NOREF(off);
4392#endif
4393}
4394
4395
4396/**
4397 * Allocates a temporary host general purpose register for keeping a guest
4398 * register value.
4399 *
4400 * Since we may already have a register holding the guest register value,
4401 * code will be emitted to do the loading if that's not the case. Code may also
4402 * be emitted if we have to free up a register to satify the request.
4403 *
4404 * @returns The host register number; throws VBox status code on failure, so no
4405 * need to check the return value.
4406 * @param pReNative The native recompile state.
4407 * @param poff Pointer to the variable with the code buffer
4408 * position. This will be update if we need to move a
4409 * variable from register to stack in order to satisfy
4410 * the request.
4411 * @param enmGstReg The guest register that will is to be updated.
4412 * @param enmIntendedUse How the caller will be using the host register.
4413 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4414 * register is okay (default). The ASSUMPTION here is
4415 * that the caller has already flushed all volatile
4416 * registers, so this is only applied if we allocate a
4417 * new register.
4418 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4419 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4420 */
4421DECL_HIDDEN_THROW(uint8_t)
4422iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4423 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4424 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4425{
4426 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4428 AssertMsg( fSkipLivenessAssert
4429 || pReNative->idxCurCall == 0
4430 || enmGstReg == kIemNativeGstReg_Pc
4431 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4432 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4433 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4434 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4435 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4436 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4437#endif
4438 RT_NOREF(fSkipLivenessAssert);
4439#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4440 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4441#endif
4442 uint32_t const fRegMask = !fNoVolatileRegs
4443 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4444 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4445
4446 /*
4447 * First check if the guest register value is already in a host register.
4448 */
4449 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4450 {
4451 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4452 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4453 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4454 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4455
4456 /* It's not supposed to be allocated... */
4457 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4458 {
4459 /*
4460 * If the register will trash the guest shadow copy, try find a
4461 * completely unused register we can use instead. If that fails,
4462 * we need to disassociate the host reg from the guest reg.
4463 */
4464 /** @todo would be nice to know if preserving the register is in any way helpful. */
4465 /* If the purpose is calculations, try duplicate the register value as
4466 we'll be clobbering the shadow. */
4467 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4468 && ( ~pReNative->Core.bmHstRegs
4469 & ~pReNative->Core.bmHstRegsWithGstShadow
4470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4471 {
4472 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4473
4474 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4475
4476 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4477 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4478 g_apszIemNativeHstRegNames[idxRegNew]));
4479 idxReg = idxRegNew;
4480 }
4481 /* If the current register matches the restrictions, go ahead and allocate
4482 it for the caller. */
4483 else if (fRegMask & RT_BIT_32(idxReg))
4484 {
4485 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4486 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4487 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4488 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4489 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4490 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4491 else
4492 {
4493 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4494 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4495 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4496 }
4497 }
4498 /* Otherwise, allocate a register that satisfies the caller and transfer
4499 the shadowing if compatible with the intended use. (This basically
4500 means the call wants a non-volatile register (RSP push/pop scenario).) */
4501 else
4502 {
4503 Assert(fNoVolatileRegs);
4504 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4505 !fNoVolatileRegs
4506 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4507 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4508 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4509 {
4510 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4511 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4512 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4513 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4514 }
4515 else
4516 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4517 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4518 g_apszIemNativeHstRegNames[idxRegNew]));
4519 idxReg = idxRegNew;
4520 }
4521 }
4522 else
4523 {
4524 /*
4525 * Oops. Shadowed guest register already allocated!
4526 *
4527 * Allocate a new register, copy the value and, if updating, the
4528 * guest shadow copy assignment to the new register.
4529 */
4530 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4531 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4532 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4533 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4534
4535 /** @todo share register for readonly access. */
4536 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4537 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4538
4539 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4540 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4541
4542 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4543 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4544 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4545 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4546 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4547 else
4548 {
4549 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4550 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4551 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4552 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4553 }
4554 idxReg = idxRegNew;
4555 }
4556 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4557
4558#ifdef VBOX_STRICT
4559 /* Strict builds: Check that the value is correct. */
4560 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4561#endif
4562
4563 return idxReg;
4564 }
4565
4566 /*
4567 * Allocate a new register, load it with the guest value and designate it as a copy of the
4568 */
4569 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4570
4571 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4572 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4573
4574 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4575 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4576 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4577 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4578
4579 return idxRegNew;
4580}
4581
4582
4583/**
4584 * Allocates a temporary host general purpose register that already holds the
4585 * given guest register value.
4586 *
4587 * The use case for this function is places where the shadowing state cannot be
4588 * modified due to branching and such. This will fail if the we don't have a
4589 * current shadow copy handy or if it's incompatible. The only code that will
4590 * be emitted here is value checking code in strict builds.
4591 *
4592 * The intended use can only be readonly!
4593 *
4594 * @returns The host register number, UINT8_MAX if not present.
4595 * @param pReNative The native recompile state.
4596 * @param poff Pointer to the instruction buffer offset.
4597 * Will be updated in strict builds if a register is
4598 * found.
4599 * @param enmGstReg The guest register that will is to be updated.
4600 * @note In strict builds, this may throw instruction buffer growth failures.
4601 * Non-strict builds will not throw anything.
4602 * @sa iemNativeRegAllocTmpForGuestReg
4603 */
4604DECL_HIDDEN_THROW(uint8_t)
4605iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4606{
4607 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4609 AssertMsg( pReNative->idxCurCall == 0
4610 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4611 || enmGstReg == kIemNativeGstReg_Pc,
4612 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4613#endif
4614
4615 /*
4616 * First check if the guest register value is already in a host register.
4617 */
4618 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4619 {
4620 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4621 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4622 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4623 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4624
4625 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4626 {
4627 /*
4628 * We only do readonly use here, so easy compared to the other
4629 * variant of this code.
4630 */
4631 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4632 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4633 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4634 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4635 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4636
4637#ifdef VBOX_STRICT
4638 /* Strict builds: Check that the value is correct. */
4639 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4640#else
4641 RT_NOREF(poff);
4642#endif
4643 return idxReg;
4644 }
4645 }
4646
4647 return UINT8_MAX;
4648}
4649
4650
4651/**
4652 * Allocates argument registers for a function call.
4653 *
4654 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4655 * need to check the return value.
4656 * @param pReNative The native recompile state.
4657 * @param off The current code buffer offset.
4658 * @param cArgs The number of arguments the function call takes.
4659 */
4660DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4661{
4662 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4663 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4664 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4665 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4666
4667 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4668 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4669 else if (cArgs == 0)
4670 return true;
4671
4672 /*
4673 * Do we get luck and all register are free and not shadowing anything?
4674 */
4675 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4676 for (uint32_t i = 0; i < cArgs; i++)
4677 {
4678 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4679 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4680 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4681 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4682 }
4683 /*
4684 * Okay, not lucky so we have to free up the registers.
4685 */
4686 else
4687 for (uint32_t i = 0; i < cArgs; i++)
4688 {
4689 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4690 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4691 {
4692 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4693 {
4694 case kIemNativeWhat_Var:
4695 {
4696 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4698 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4699 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4700 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4701
4702 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4703 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4704 else
4705 {
4706 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4707 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4708 }
4709 break;
4710 }
4711
4712 case kIemNativeWhat_Tmp:
4713 case kIemNativeWhat_Arg:
4714 case kIemNativeWhat_rc:
4715 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4716 default:
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4718 }
4719
4720 }
4721 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4722 {
4723 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4724 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4725 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4726 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4727 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4728 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4729 }
4730 else
4731 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4732 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4733 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4734 }
4735 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4736 return true;
4737}
4738
4739
4740DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4741
4742
4743#if 0
4744/**
4745 * Frees a register assignment of any type.
4746 *
4747 * @param pReNative The native recompile state.
4748 * @param idxHstReg The register to free.
4749 *
4750 * @note Does not update variables.
4751 */
4752DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4753{
4754 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4755 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4756 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4757 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4758 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4759 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4760 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4761 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4762 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4763 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4764 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4765 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4766 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4767 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4768
4769 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4770 /* no flushing, right:
4771 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4772 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4773 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4774 */
4775}
4776#endif
4777
4778
4779/**
4780 * Frees a temporary register.
4781 *
4782 * Any shadow copies of guest registers assigned to the host register will not
4783 * be flushed by this operation.
4784 */
4785DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4786{
4787 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4788 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4789 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4790 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4791 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4792}
4793
4794
4795/**
4796 * Frees a temporary immediate register.
4797 *
4798 * It is assumed that the call has not modified the register, so it still hold
4799 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4800 */
4801DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4802{
4803 iemNativeRegFreeTmp(pReNative, idxHstReg);
4804}
4805
4806
4807/**
4808 * Frees a register assigned to a variable.
4809 *
4810 * The register will be disassociated from the variable.
4811 */
4812DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4813{
4814 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4815 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4816 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4818 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4819
4820 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4821 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4822 if (!fFlushShadows)
4823 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4824 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4825 else
4826 {
4827 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4828 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4829 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4830 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4831 uint64_t fGstRegShadows = fGstRegShadowsOld;
4832 while (fGstRegShadows)
4833 {
4834 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4835 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4836
4837 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4838 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4839 }
4840 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4841 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4842 }
4843}
4844
4845
4846/**
4847 * Called right before emitting a call instruction to move anything important
4848 * out of call-volatile registers, free and flush the call-volatile registers,
4849 * optionally freeing argument variables.
4850 *
4851 * @returns New code buffer offset, UINT32_MAX on failure.
4852 * @param pReNative The native recompile state.
4853 * @param off The code buffer offset.
4854 * @param cArgs The number of arguments the function call takes.
4855 * It is presumed that the host register part of these have
4856 * been allocated as such already and won't need moving,
4857 * just freeing.
4858 * @param fKeepVars Mask of variables that should keep their register
4859 * assignments. Caller must take care to handle these.
4860 */
4861DECL_HIDDEN_THROW(uint32_t)
4862iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4863{
4864 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4865
4866 /* fKeepVars will reduce this mask. */
4867 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4868
4869 /*
4870 * Move anything important out of volatile registers.
4871 */
4872 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4873 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4874 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4875#ifdef IEMNATIVE_REG_FIXED_TMP0
4876 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4877#endif
4878#ifdef IEMNATIVE_REG_FIXED_TMP1
4879 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4880#endif
4881#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4882 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4883#endif
4884 & ~g_afIemNativeCallRegs[cArgs];
4885
4886 fRegsToMove &= pReNative->Core.bmHstRegs;
4887 if (!fRegsToMove)
4888 { /* likely */ }
4889 else
4890 {
4891 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4892 while (fRegsToMove != 0)
4893 {
4894 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4895 fRegsToMove &= ~RT_BIT_32(idxReg);
4896
4897 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4898 {
4899 case kIemNativeWhat_Var:
4900 {
4901 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4903 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4904 Assert(pVar->idxReg == idxReg);
4905 if (!(RT_BIT_32(idxVar) & fKeepVars))
4906 {
4907 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4908 idxVar, pVar->enmKind, pVar->idxReg));
4909 if (pVar->enmKind != kIemNativeVarKind_Stack)
4910 pVar->idxReg = UINT8_MAX;
4911 else
4912 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4913 }
4914 else
4915 fRegsToFree &= ~RT_BIT_32(idxReg);
4916 continue;
4917 }
4918
4919 case kIemNativeWhat_Arg:
4920 AssertMsgFailed(("What?!?: %u\n", idxReg));
4921 continue;
4922
4923 case kIemNativeWhat_rc:
4924 case kIemNativeWhat_Tmp:
4925 AssertMsgFailed(("Missing free: %u\n", idxReg));
4926 continue;
4927
4928 case kIemNativeWhat_FixedTmp:
4929 case kIemNativeWhat_pVCpuFixed:
4930 case kIemNativeWhat_pCtxFixed:
4931 case kIemNativeWhat_PcShadow:
4932 case kIemNativeWhat_FixedReserved:
4933 case kIemNativeWhat_Invalid:
4934 case kIemNativeWhat_End:
4935 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4936 }
4937 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4938 }
4939 }
4940
4941 /*
4942 * Do the actual freeing.
4943 */
4944 if (pReNative->Core.bmHstRegs & fRegsToFree)
4945 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4946 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4947 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4948
4949 /* If there are guest register shadows in any call-volatile register, we
4950 have to clear the corrsponding guest register masks for each register. */
4951 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4952 if (fHstRegsWithGstShadow)
4953 {
4954 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4955 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4956 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4957 do
4958 {
4959 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4960 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4961
4962 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4963 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4964 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4965 } while (fHstRegsWithGstShadow != 0);
4966 }
4967
4968 return off;
4969}
4970
4971
4972/**
4973 * Flushes a set of guest register shadow copies.
4974 *
4975 * This is usually done after calling a threaded function or a C-implementation
4976 * of an instruction.
4977 *
4978 * @param pReNative The native recompile state.
4979 * @param fGstRegs Set of guest registers to flush.
4980 */
4981DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4982{
4983 /*
4984 * Reduce the mask by what's currently shadowed
4985 */
4986 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4987 fGstRegs &= bmGstRegShadowsOld;
4988 if (fGstRegs)
4989 {
4990 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4991 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4992 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4993 if (bmGstRegShadowsNew)
4994 {
4995 /*
4996 * Partial.
4997 */
4998 do
4999 {
5000 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5001 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5002 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5003 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5004 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5005
5006 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5007 fGstRegs &= ~fInThisHstReg;
5008 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5010 if (!fGstRegShadowsNew)
5011 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5012 } while (fGstRegs != 0);
5013 }
5014 else
5015 {
5016 /*
5017 * Clear all.
5018 */
5019 do
5020 {
5021 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5022 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5023 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5024 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5025 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5026
5027 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5028 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5029 } while (fGstRegs != 0);
5030 pReNative->Core.bmHstRegsWithGstShadow = 0;
5031 }
5032 }
5033}
5034
5035
5036/**
5037 * Flushes guest register shadow copies held by a set of host registers.
5038 *
5039 * This is used with the TLB lookup code for ensuring that we don't carry on
5040 * with any guest shadows in volatile registers, as these will get corrupted by
5041 * a TLB miss.
5042 *
5043 * @param pReNative The native recompile state.
5044 * @param fHstRegs Set of host registers to flush guest shadows for.
5045 */
5046DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5047{
5048 /*
5049 * Reduce the mask by what's currently shadowed.
5050 */
5051 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5052 fHstRegs &= bmHstRegsWithGstShadowOld;
5053 if (fHstRegs)
5054 {
5055 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5056 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5057 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5058 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5059 if (bmHstRegsWithGstShadowNew)
5060 {
5061 /*
5062 * Partial (likely).
5063 */
5064 uint64_t fGstShadows = 0;
5065 do
5066 {
5067 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5068 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5069 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5070 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5071
5072 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5073 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5074 fHstRegs &= ~RT_BIT_32(idxHstReg);
5075 } while (fHstRegs != 0);
5076 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5077 }
5078 else
5079 {
5080 /*
5081 * Clear all.
5082 */
5083 do
5084 {
5085 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5086 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5087 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5088 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5089
5090 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5091 fHstRegs &= ~RT_BIT_32(idxHstReg);
5092 } while (fHstRegs != 0);
5093 pReNative->Core.bmGstRegShadows = 0;
5094 }
5095 }
5096}
5097
5098
5099/**
5100 * Restores guest shadow copies in volatile registers.
5101 *
5102 * This is used after calling a helper function (think TLB miss) to restore the
5103 * register state of volatile registers.
5104 *
5105 * @param pReNative The native recompile state.
5106 * @param off The code buffer offset.
5107 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5108 * be active (allocated) w/o asserting. Hack.
5109 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5110 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5111 */
5112DECL_HIDDEN_THROW(uint32_t)
5113iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5114{
5115 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5116 if (fHstRegs)
5117 {
5118 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5119 do
5120 {
5121 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5122
5123 /* It's not fatal if a register is active holding a variable that
5124 shadowing a guest register, ASSUMING all pending guest register
5125 writes were flushed prior to the helper call. However, we'll be
5126 emitting duplicate restores, so it wasts code space. */
5127 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5128 RT_NOREF(fHstRegsActiveShadows);
5129
5130 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5131 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5132 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5133 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5134
5135 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5136 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5137
5138 fHstRegs &= ~RT_BIT_32(idxHstReg);
5139 } while (fHstRegs != 0);
5140 }
5141 return off;
5142}
5143
5144
5145#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5146# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5147static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5148{
5149 /* Compare the shadow with the context value, they should match. */
5150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5151 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5152 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5153 return off;
5154}
5155# endif
5156
5157/**
5158 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5159 */
5160static uint32_t
5161iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5162{
5163 if (pReNative->Core.offPc)
5164 {
5165# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5166 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5167 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5168# endif
5169
5170# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5171 /* Allocate a temporary PC register. */
5172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5173
5174 /* Perform the addition and store the result. */
5175 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5176 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5177
5178 /* Free but don't flush the PC register. */
5179 iemNativeRegFreeTmp(pReNative, idxPcReg);
5180# else
5181 /* Compare the shadow with the context value, they should match. */
5182 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5183 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5184# endif
5185
5186 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5187 pReNative->Core.offPc = 0;
5188 pReNative->Core.cInstrPcUpdateSkipped = 0;
5189 }
5190# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5191 else
5192 {
5193 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5194 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5195 }
5196# endif
5197
5198 return off;
5199}
5200#endif
5201
5202
5203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5204
5205
5206/*********************************************************************************************************************************
5207* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5208*********************************************************************************************************************************/
5209
5210/**
5211 * Info about shadowed guest SIMD register values.
5212 * @see IEMNATIVEGSTSIMDREG
5213 */
5214static struct
5215{
5216 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5217 uint32_t offXmm;
5218 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5219 uint32_t offYmm;
5220 /** Name (for logging). */
5221 const char *pszName;
5222} const g_aGstSimdShadowInfo[] =
5223{
5224#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5225 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5226 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5227 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5228 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5229 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5230 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5231 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5232 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5233 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5234 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5235 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5236 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5237 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5238 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5239 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5240 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5241 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5242#undef CPUMCTX_OFF_AND_SIZE
5243};
5244AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5245
5246
5247#ifdef LOG_ENABLED
5248/** Host CPU SIMD register names. */
5249DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5250{
5251#ifdef RT_ARCH_AMD64
5252 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5253#elif RT_ARCH_ARM64
5254 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5255 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5256#else
5257# error "port me"
5258#endif
5259};
5260#endif
5261
5262
5263DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
5264 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
5265{
5266 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5267
5268 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = enmWhat;
5269 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5270 RT_NOREF(idxVar);
5271 return idxSimdReg;
5272}
5273
5274
5275/**
5276 * Frees a temporary SIMD register.
5277 *
5278 * Any shadow copies of guest registers assigned to the host register will not
5279 * be flushed by this operation.
5280 */
5281DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5282{
5283 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5284 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5285 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5286 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5287 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5288}
5289
5290
5291/**
5292 * Locate a register, possibly freeing one up.
5293 *
5294 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5295 * failed.
5296 *
5297 * @returns Host register number on success. Returns UINT8_MAX if no registers
5298 * found, the caller is supposed to deal with this and raise a
5299 * allocation type specific status code (if desired).
5300 *
5301 * @throws VBox status code if we're run into trouble spilling a variable of
5302 * recording debug info. Does NOT throw anything if we're out of
5303 * registers, though.
5304 */
5305static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5306 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5307{
5308 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5309 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5310 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5311
5312 AssertFailed();
5313
5314 /*
5315 * Try a freed register that's shadowing a guest register.
5316 */
5317 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5318 if (fRegs)
5319 {
5320 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5321
5322#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5323 /*
5324 * When we have livness information, we use it to kick out all shadowed
5325 * guest register that will not be needed any more in this TB. If we're
5326 * lucky, this may prevent us from ending up here again.
5327 *
5328 * Note! We must consider the previous entry here so we don't free
5329 * anything that the current threaded function requires (current
5330 * entry is produced by the next threaded function).
5331 */
5332 uint32_t const idxCurCall = pReNative->idxCurCall;
5333 if (idxCurCall > 0)
5334 {
5335 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5336
5337# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5338 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5339 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5340 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5341#else
5342 /* Construct a mask of the registers not in the read or write state.
5343 Note! We could skips writes, if they aren't from us, as this is just
5344 a hack to prevent trashing registers that have just been written
5345 or will be written when we retire the current instruction. */
5346 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5347 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5348 & IEMLIVENESSBIT_MASK;
5349#endif
5350 /* Merge EFLAGS. */
5351 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
5352 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
5353 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
5354 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
5355 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
5356
5357 /* If it matches any shadowed registers. */
5358 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5359 {
5360 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5361 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5362 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5363
5364 /* See if we've got any unshadowed registers we can return now. */
5365 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5366 if (fUnshadowedRegs)
5367 {
5368 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5369 return (fPreferVolatile
5370 ? ASMBitFirstSetU32(fUnshadowedRegs)
5371 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5372 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5373 - 1;
5374 }
5375 }
5376 }
5377#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5378
5379 unsigned const idxReg = (fPreferVolatile
5380 ? ASMBitFirstSetU32(fRegs)
5381 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5382 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5383 - 1;
5384
5385 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5386 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5387 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5388 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5389 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5390
5391 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5392 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5393 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5394 return idxReg;
5395 }
5396
5397 /*
5398 * Try free up a variable that's in a register.
5399 *
5400 * We do two rounds here, first evacuating variables we don't need to be
5401 * saved on the stack, then in the second round move things to the stack.
5402 */
5403 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5404 AssertReleaseFailed(); /** @todo */
5405#if 0
5406 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5407 {
5408 uint32_t fVars = pReNative->Core.bmSimdVars;
5409 while (fVars)
5410 {
5411 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5412 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5413 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5414 && (RT_BIT_32(idxReg) & fRegMask)
5415 && ( iLoop == 0
5416 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5417 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5418 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5419 {
5420 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5421 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5422 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5423 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5424 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5425 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5426
5427 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5428 {
5429 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5430 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5431 }
5432
5433 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5434 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5435
5436 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5437 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5438 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5439 return idxReg;
5440 }
5441 fVars &= ~RT_BIT_32(idxVar);
5442 }
5443 }
5444#else
5445 RT_NOREF(poff);
5446#endif
5447
5448 return UINT8_MAX;
5449}
5450
5451
5452/**
5453 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
5454 * SIMD register @a enmGstSimdReg.
5455 *
5456 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
5457 * host register before calling.
5458 */
5459DECL_FORCE_INLINE(void)
5460iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5461{
5462 Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
5463 Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5464 Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
5465
5466 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxHstSimdReg;
5467 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5468 pReNative->Core.bmGstSimdRegShadows |= RT_BIT_64(enmGstSimdReg);
5469 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxHstSimdReg);
5470#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5471 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5472 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
5473#else
5474 RT_NOREF(off);
5475#endif
5476}
5477
5478
5479/**
5480 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
5481 * to @a idxSimdRegTo.
5482 */
5483DECL_FORCE_INLINE(void)
5484iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
5485 IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5486{
5487 Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5488 Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
5489 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
5490 == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
5491 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5492 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
5493 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
5494 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
5495 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
5496 Assert( pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
5497 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
5498
5499
5500 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
5501 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows = fGstRegShadowsFrom;
5502 if (!fGstRegShadowsFrom)
5503 {
5504 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdRegFrom);
5505 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5506 }
5507 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxSimdRegTo);
5508 pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5509 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxSimdRegTo;
5510#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5511 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5512 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
5513#else
5514 RT_NOREF(off);
5515#endif
5516}
5517
5518
5519/**
5520 * Clear any guest register shadow claims from @a idxHstSimdReg.
5521 *
5522 * The register does not need to be shadowing any guest registers.
5523 */
5524DECL_FORCE_INLINE(void)
5525iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
5526{
5527 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5528 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
5529 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5530 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
5531 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5532 Assert( !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
5533 && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
5534
5535#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5536 uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5537 if (fGstRegs)
5538 {
5539 Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
5540 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5541 while (fGstRegs)
5542 {
5543 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5544 fGstRegs &= ~RT_BIT_64(iGstReg);
5545 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
5546 }
5547 }
5548#else
5549 RT_NOREF(off);
5550#endif
5551
5552 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstSimdReg);
5553 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5554 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5555 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5556}
5557
5558
5559/**
5560 * Flushes a set of guest register shadow copies.
5561 *
5562 * This is usually done after calling a threaded function or a C-implementation
5563 * of an instruction.
5564 *
5565 * @param pReNative The native recompile state.
5566 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5567 */
5568DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5569{
5570 /*
5571 * Reduce the mask by what's currently shadowed
5572 */
5573 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5574 fGstSimdRegs &= bmGstSimdRegShadows;
5575 if (fGstSimdRegs)
5576 {
5577 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5578 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5579 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5580 if (bmGstSimdRegShadowsNew)
5581 {
5582 /*
5583 * Partial.
5584 */
5585 do
5586 {
5587 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5588 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5589 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5590 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5591 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5592 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5593
5594 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5595 fGstSimdRegs &= ~fInThisHstReg;
5596 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5597 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5598 if (!fGstRegShadowsNew)
5599 {
5600 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5601 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5602 }
5603 } while (fGstSimdRegs != 0);
5604 }
5605 else
5606 {
5607 /*
5608 * Clear all.
5609 */
5610 do
5611 {
5612 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5613 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5614 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5615 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5616 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5617 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5618
5619 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5620 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5621 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5622 } while (fGstSimdRegs != 0);
5623 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5624 }
5625 }
5626}
5627
5628
5629/**
5630 * Allocates a temporary host SIMD register.
5631 *
5632 * This may emit code to save register content onto the stack in order to free
5633 * up a register.
5634 *
5635 * @returns The host register number; throws VBox status code on failure,
5636 * so no need to check the return value.
5637 * @param pReNative The native recompile state.
5638 * @param poff Pointer to the variable with the code buffer position.
5639 * This will be update if we need to move a variable from
5640 * register to stack in order to satisfy the request.
5641 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5642 * registers (@c true, default) or the other way around
5643 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5644 */
5645DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5646{
5647 /*
5648 * Try find a completely unused register, preferably a call-volatile one.
5649 */
5650 uint8_t idxSimdReg;
5651 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5652 & ~pReNative->Core.bmHstRegsWithGstShadow
5653 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5654 if (fRegs)
5655 {
5656 if (fPreferVolatile)
5657 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5658 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5659 else
5660 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5661 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5662 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5663 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5664 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5665 }
5666 else
5667 {
5668 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5669 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5670 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5671 }
5672
5673 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5674 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5675}
5676
5677
5678/**
5679 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5680 * registers.
5681 *
5682 * @returns The host register number; throws VBox status code on failure,
5683 * so no need to check the return value.
5684 * @param pReNative The native recompile state.
5685 * @param poff Pointer to the variable with the code buffer position.
5686 * This will be update if we need to move a variable from
5687 * register to stack in order to satisfy the request.
5688 * @param fRegMask Mask of acceptable registers.
5689 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5690 * registers (@c true, default) or the other way around
5691 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5692 */
5693DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5694 bool fPreferVolatile /*= true*/)
5695{
5696 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5697 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5698
5699 /*
5700 * Try find a completely unused register, preferably a call-volatile one.
5701 */
5702 uint8_t idxSimdReg;
5703 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5704 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5705 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5706 & fRegMask;
5707 if (fRegs)
5708 {
5709 if (fPreferVolatile)
5710 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5711 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5712 else
5713 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5714 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5715 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5716 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5717 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5718 }
5719 else
5720 {
5721 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5722 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5723 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5724 }
5725
5726 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5727 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5728}
5729
5730
5731static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5732 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5733{
5734 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5735 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == enmLoadSzDst
5736 || pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5737 {
5738# ifdef RT_ARCH_ARM64
5739 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5740 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5741# endif
5742
5743 switch (enmLoadSzDst)
5744 {
5745 case kIemNativeGstSimdRegLdStSz_256:
5746 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5747 break;
5748 case kIemNativeGstSimdRegLdStSz_Low128:
5749 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5750 break;
5751 case kIemNativeGstSimdRegLdStSz_High128:
5752 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5753 break;
5754 default:
5755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5756 }
5757
5758 pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst;
5759 return off;
5760 }
5761 else
5762 {
5763 /* Complicated stuff where the source is currently missing something, later. */
5764 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5765 }
5766
5767 return off;
5768}
5769
5770
5771/**
5772 * Allocates a temporary host SIMD register for keeping a guest
5773 * SIMD register value.
5774 *
5775 * Since we may already have a register holding the guest register value,
5776 * code will be emitted to do the loading if that's not the case. Code may also
5777 * be emitted if we have to free up a register to satify the request.
5778 *
5779 * @returns The host register number; throws VBox status code on failure, so no
5780 * need to check the return value.
5781 * @param pReNative The native recompile state.
5782 * @param poff Pointer to the variable with the code buffer
5783 * position. This will be update if we need to move a
5784 * variable from register to stack in order to satisfy
5785 * the request.
5786 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5787 * @param enmIntendedUse How the caller will be using the host register.
5788 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5789 * register is okay (default). The ASSUMPTION here is
5790 * that the caller has already flushed all volatile
5791 * registers, so this is only applied if we allocate a
5792 * new register.
5793 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5794 */
5795DECL_HIDDEN_THROW(uint8_t)
5796iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5797 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5798 bool fNoVolatileRegs /*= false*/)
5799{
5800 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5801#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5802 AssertMsg( pReNative->idxCurCall == 0
5803 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5804 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5805 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5806 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5807 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5808 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5809#endif
5810#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5811 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5812#endif
5813 uint32_t const fRegMask = !fNoVolatileRegs
5814 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5815 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5816
5817 /*
5818 * First check if the guest register value is already in a host register.
5819 */
5820 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5821 {
5822 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5823 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5824 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5825 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5826
5827 /* It's not supposed to be allocated... */
5828 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5829 {
5830 /*
5831 * If the register will trash the guest shadow copy, try find a
5832 * completely unused register we can use instead. If that fails,
5833 * we need to disassociate the host reg from the guest reg.
5834 */
5835 /** @todo would be nice to know if preserving the register is in any way helpful. */
5836 /* If the purpose is calculations, try duplicate the register value as
5837 we'll be clobbering the shadow. */
5838 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5839 && ( ~pReNative->Core.bmHstSimdRegs
5840 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5841 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5842 {
5843 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5844
5845 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5846
5847 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5848 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5849 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5850 idxSimdReg = idxRegNew;
5851 }
5852 /* If the current register matches the restrictions, go ahead and allocate
5853 it for the caller. */
5854 else if (fRegMask & RT_BIT_32(idxSimdReg))
5855 {
5856 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5857 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5858 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5859 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5860 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5861 else
5862 {
5863 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5864 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5865 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5866 }
5867 }
5868 /* Otherwise, allocate a register that satisfies the caller and transfer
5869 the shadowing if compatible with the intended use. (This basically
5870 means the call wants a non-volatile register (RSP push/pop scenario).) */
5871 else
5872 {
5873 Assert(fNoVolatileRegs);
5874 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5875 !fNoVolatileRegs
5876 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5877 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5878 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5879 {
5880 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5881 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5882 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5883 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5884 }
5885 else
5886 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5887 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5888 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5889 idxSimdReg = idxRegNew;
5890 }
5891 }
5892 else
5893 {
5894 /*
5895 * Oops. Shadowed guest register already allocated!
5896 *
5897 * Allocate a new register, copy the value and, if updating, the
5898 * guest shadow copy assignment to the new register.
5899 */
5900 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5901 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5902 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5903 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5904
5905 /** @todo share register for readonly access. */
5906 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5907 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5908
5909 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5910 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5911 else
5912 {
5913 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5914 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5915 }
5916
5917 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5918 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5919 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5920 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5921 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5922 else
5923 {
5924 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5925 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5926 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5927 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5928 }
5929 idxSimdReg = idxRegNew;
5930 }
5931 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5932
5933#ifdef VBOX_STRICT
5934 /* Strict builds: Check that the value is correct. */
5935 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5936#endif
5937
5938 return idxSimdReg;
5939 }
5940
5941 /*
5942 * Allocate a new register, load it with the guest value and designate it as a copy of the
5943 */
5944 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5945
5946 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5947 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5948 else
5949 {
5950 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5951 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5952 }
5953
5954 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5955 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5956
5957 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5958 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5959
5960 return idxRegNew;
5961}
5962
5963
5964/**
5965 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5966 *
5967 * @returns New code bufferoffset.
5968 * @param pReNative The native recompile state.
5969 * @param off Current code buffer position.
5970 * @param idxGstSimdReg The guest SIMD register to flush.
5971 */
5972static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGstSimdReg)
5973{
5974 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5975
5976 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5977 g_aGstSimdShadowInfo[idxGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5978 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg),
5979 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg)));
5980
5981#ifdef RT_ARCH_AMD64
5982 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
5983 {
5984 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5985 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5986 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
5987 }
5988
5989 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))
5990 {
5991 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5992 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5993 AssertReleaseFailed();
5994 //off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
5995 }
5996#elif defined(RT_ARCH_ARM64)
5997 /* ASSUMING there are two consecutive host registers to store the potential 256-bit guest register. */
5998 Assert(!(idxHstSimdReg & 0x1));
5999 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
6000 {
6001 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
6002 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
6003 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
6004 }
6005
6006 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))
6007 {
6008 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
6009 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
6010 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg + 1, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
6011 }
6012#endif
6013
6014 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, idxGstSimdReg);
6015 return off;
6016}
6017
6018#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6019
6020
6021
6022/*********************************************************************************************************************************
6023* Code emitters for flushing pending guest register writes and sanity checks *
6024*********************************************************************************************************************************/
6025
6026/**
6027 * Flushes delayed write of a specific guest register.
6028 *
6029 * This must be called prior to calling CImpl functions and any helpers that use
6030 * the guest state (like raising exceptions) and such.
6031 *
6032 * This optimization has not yet been implemented. The first target would be
6033 * RIP updates, since these are the most common ones.
6034 */
6035DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6036 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
6037{
6038#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6039 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
6040#endif
6041
6042#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6043 if ( enmClass == kIemNativeGstRegRef_XReg
6044 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
6045 {
6046 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxReg);
6047 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
6048 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
6049
6050 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6051 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
6052 }
6053#endif
6054 RT_NOREF(pReNative, enmClass, idxReg);
6055 return off;
6056}
6057
6058
6059/**
6060 * Flushes any delayed guest register writes.
6061 *
6062 * This must be called prior to calling CImpl functions and any helpers that use
6063 * the guest state (like raising exceptions) and such.
6064 *
6065 * This optimization has not yet been implemented. The first target would be
6066 * RIP updates, since these are the most common ones.
6067 */
6068DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
6069 bool fFlushShadows /*= true*/)
6070{
6071#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6072 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6073 off = iemNativeEmitPcWriteback(pReNative, off);
6074#else
6075 RT_NOREF(pReNative, fGstShwExcept);
6076#endif
6077
6078#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6079 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6080 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6081 {
6082 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6083 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6084
6085 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6086 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxGstSimdReg);
6087
6088 if ( fFlushShadows
6089 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6090 {
6091 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6092
6093 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6094 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6095 }
6096 }
6097#else
6098 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6099#endif
6100
6101 return off;
6102}
6103
6104
6105#ifdef VBOX_STRICT
6106/**
6107 * Does internal register allocator sanity checks.
6108 */
6109static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6110{
6111 /*
6112 * Iterate host registers building a guest shadowing set.
6113 */
6114 uint64_t bmGstRegShadows = 0;
6115 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6116 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6117 while (bmHstRegsWithGstShadow)
6118 {
6119 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6120 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6121 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6122
6123 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6124 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6125 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6126 bmGstRegShadows |= fThisGstRegShadows;
6127 while (fThisGstRegShadows)
6128 {
6129 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6130 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6131 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6132 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6133 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6134 }
6135 }
6136 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6137 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6138 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6139
6140 /*
6141 * Now the other way around, checking the guest to host index array.
6142 */
6143 bmHstRegsWithGstShadow = 0;
6144 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6145 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6146 while (bmGstRegShadows)
6147 {
6148 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6149 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6150 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6151
6152 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6153 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6154 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6155 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6156 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6157 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6158 }
6159 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6160 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6161 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6162}
6163#endif
6164
6165
6166/*********************************************************************************************************************************
6167* Code Emitters (larger snippets) *
6168*********************************************************************************************************************************/
6169
6170/**
6171 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6172 * extending to 64-bit width.
6173 *
6174 * @returns New code buffer offset on success, UINT32_MAX on failure.
6175 * @param pReNative .
6176 * @param off The current code buffer position.
6177 * @param idxHstReg The host register to load the guest register value into.
6178 * @param enmGstReg The guest register to load.
6179 *
6180 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6181 * that is something the caller needs to do if applicable.
6182 */
6183DECL_HIDDEN_THROW(uint32_t)
6184iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6185{
6186 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
6187 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6188
6189 switch (g_aGstShadowInfo[enmGstReg].cb)
6190 {
6191 case sizeof(uint64_t):
6192 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6193 case sizeof(uint32_t):
6194 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6195 case sizeof(uint16_t):
6196 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6197#if 0 /* not present in the table. */
6198 case sizeof(uint8_t):
6199 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6200#endif
6201 default:
6202 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6203 }
6204}
6205
6206
6207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6208/**
6209 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6210 *
6211 * @returns New code buffer offset on success, UINT32_MAX on failure.
6212 * @param pReNative The recompiler state.
6213 * @param off The current code buffer position.
6214 * @param idxHstSimdReg The host register to load the guest register value into.
6215 * @param enmGstSimdReg The guest register to load.
6216 * @param enmLoadSz The load size of the register.
6217 *
6218 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6219 * that is something the caller needs to do if applicable.
6220 */
6221DECL_HIDDEN_THROW(uint32_t)
6222iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6223 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6224{
6225 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6226
6227 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6228 switch (enmLoadSz)
6229 {
6230 case kIemNativeGstSimdRegLdStSz_256:
6231 return iemNativeEmitSimdLoadVecRegFromVCpuU256(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm,
6232 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6233 case kIemNativeGstSimdRegLdStSz_Low128:
6234 return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6235 case kIemNativeGstSimdRegLdStSz_High128:
6236 return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6237 default:
6238 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6239 }
6240}
6241#endif
6242
6243#ifdef VBOX_STRICT
6244/**
6245 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6246 *
6247 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6248 * Trashes EFLAGS on AMD64.
6249 */
6250static uint32_t
6251iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6252{
6253# ifdef RT_ARCH_AMD64
6254 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6255
6256 /* rol reg64, 32 */
6257 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6258 pbCodeBuf[off++] = 0xc1;
6259 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6260 pbCodeBuf[off++] = 32;
6261
6262 /* test reg32, ffffffffh */
6263 if (idxReg >= 8)
6264 pbCodeBuf[off++] = X86_OP_REX_B;
6265 pbCodeBuf[off++] = 0xf7;
6266 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6267 pbCodeBuf[off++] = 0xff;
6268 pbCodeBuf[off++] = 0xff;
6269 pbCodeBuf[off++] = 0xff;
6270 pbCodeBuf[off++] = 0xff;
6271
6272 /* je/jz +1 */
6273 pbCodeBuf[off++] = 0x74;
6274 pbCodeBuf[off++] = 0x01;
6275
6276 /* int3 */
6277 pbCodeBuf[off++] = 0xcc;
6278
6279 /* rol reg64, 32 */
6280 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6281 pbCodeBuf[off++] = 0xc1;
6282 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6283 pbCodeBuf[off++] = 32;
6284
6285# elif defined(RT_ARCH_ARM64)
6286 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6287 /* lsr tmp0, reg64, #32 */
6288 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6289 /* cbz tmp0, +1 */
6290 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6291 /* brk #0x1100 */
6292 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6293
6294# else
6295# error "Port me!"
6296# endif
6297 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6298 return off;
6299}
6300#endif /* VBOX_STRICT */
6301
6302
6303#ifdef VBOX_STRICT
6304/**
6305 * Emitting code that checks that the content of register @a idxReg is the same
6306 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6307 * instruction if that's not the case.
6308 *
6309 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6310 * Trashes EFLAGS on AMD64.
6311 */
6312static uint32_t
6313iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6314{
6315# ifdef RT_ARCH_AMD64
6316 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6317
6318 /* cmp reg, [mem] */
6319 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6320 {
6321 if (idxReg >= 8)
6322 pbCodeBuf[off++] = X86_OP_REX_R;
6323 pbCodeBuf[off++] = 0x38;
6324 }
6325 else
6326 {
6327 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6328 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6329 else
6330 {
6331 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6332 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6333 else
6334 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6335 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6336 if (idxReg >= 8)
6337 pbCodeBuf[off++] = X86_OP_REX_R;
6338 }
6339 pbCodeBuf[off++] = 0x39;
6340 }
6341 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6342
6343 /* je/jz +1 */
6344 pbCodeBuf[off++] = 0x74;
6345 pbCodeBuf[off++] = 0x01;
6346
6347 /* int3 */
6348 pbCodeBuf[off++] = 0xcc;
6349
6350 /* For values smaller than the register size, we must check that the rest
6351 of the register is all zeros. */
6352 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6353 {
6354 /* test reg64, imm32 */
6355 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6356 pbCodeBuf[off++] = 0xf7;
6357 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6358 pbCodeBuf[off++] = 0;
6359 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6360 pbCodeBuf[off++] = 0xff;
6361 pbCodeBuf[off++] = 0xff;
6362
6363 /* je/jz +1 */
6364 pbCodeBuf[off++] = 0x74;
6365 pbCodeBuf[off++] = 0x01;
6366
6367 /* int3 */
6368 pbCodeBuf[off++] = 0xcc;
6369 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6370 }
6371 else
6372 {
6373 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6374 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6375 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6376 }
6377
6378# elif defined(RT_ARCH_ARM64)
6379 /* mov TMP0, [gstreg] */
6380 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6381
6382 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6383 /* sub tmp0, tmp0, idxReg */
6384 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6385 /* cbz tmp0, +1 */
6386 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6387 /* brk #0x1000+enmGstReg */
6388 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6389 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6390
6391# else
6392# error "Port me!"
6393# endif
6394 return off;
6395}
6396
6397
6398# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6399/**
6400 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6401 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6402 * instruction if that's not the case.
6403 *
6404 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6405 * Trashes EFLAGS on AMD64.
6406 */
6407static uint32_t
6408iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6409 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6410{
6411# ifdef RT_ARCH_AMD64
6412 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
6413
6414 /* movdqa vectmp0, idxSimdReg */
6415 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6416
6417 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6418
6419 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6420 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6421 if (idxSimdReg >= 8)
6422 pbCodeBuf[off++] = X86_OP_REX_R;
6423 pbCodeBuf[off++] = 0x0f;
6424 pbCodeBuf[off++] = 0x38;
6425 pbCodeBuf[off++] = 0x29;
6426 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6427
6428 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6429 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6430 pbCodeBuf[off++] = X86_OP_REX_W
6431 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6432 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6433 pbCodeBuf[off++] = 0x0f;
6434 pbCodeBuf[off++] = 0x3a;
6435 pbCodeBuf[off++] = 0x16;
6436 pbCodeBuf[off++] = 0xeb;
6437 pbCodeBuf[off++] = 0x00;
6438
6439 /* test tmp0, 0xffffffff. */
6440 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6441 pbCodeBuf[off++] = 0xf7;
6442 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6443 pbCodeBuf[off++] = 0xff;
6444 pbCodeBuf[off++] = 0xff;
6445 pbCodeBuf[off++] = 0xff;
6446 pbCodeBuf[off++] = 0xff;
6447
6448 /* je/jz +1 */
6449 pbCodeBuf[off++] = 0x74;
6450 pbCodeBuf[off++] = 0x01;
6451
6452 /* int3 */
6453 pbCodeBuf[off++] = 0xcc;
6454
6455 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6456 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6457 pbCodeBuf[off++] = X86_OP_REX_W
6458 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6459 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6460 pbCodeBuf[off++] = 0x0f;
6461 pbCodeBuf[off++] = 0x3a;
6462 pbCodeBuf[off++] = 0x16;
6463 pbCodeBuf[off++] = 0xeb;
6464 pbCodeBuf[off++] = 0x01;
6465
6466 /* test tmp0, 0xffffffff. */
6467 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6468 pbCodeBuf[off++] = 0xf7;
6469 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6470 pbCodeBuf[off++] = 0xff;
6471 pbCodeBuf[off++] = 0xff;
6472 pbCodeBuf[off++] = 0xff;
6473 pbCodeBuf[off++] = 0xff;
6474
6475 /* je/jz +1 */
6476 pbCodeBuf[off++] = 0x74;
6477 pbCodeBuf[off++] = 0x01;
6478
6479 /* int3 */
6480 pbCodeBuf[off++] = 0xcc;
6481
6482# elif defined(RT_ARCH_ARM64)
6483 /* mov vectmp0, [gstreg] */
6484 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6485
6486 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6487 {
6488 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6489 /* eor vectmp0, vectmp0, idxSimdReg */
6490 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6491 /* cnt vectmp0, vectmp0, #0*/
6492 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6493 /* umov tmp0, vectmp0.D[0] */
6494 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6495 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6496 /* cbz tmp0, +1 */
6497 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6498 /* brk #0x1000+enmGstReg */
6499 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6500 }
6501
6502 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6503 {
6504 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6505 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6506 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6507 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6508 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6509 /* umov tmp0, (vectmp0 + 1).D[0] */
6510 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6511 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6512 /* cbz tmp0, +1 */
6513 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6514 /* brk #0x1000+enmGstReg */
6515 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6516 }
6517
6518# else
6519# error "Port me!"
6520# endif
6521
6522 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6523 return off;
6524}
6525# endif
6526#endif /* VBOX_STRICT */
6527
6528
6529#ifdef VBOX_STRICT
6530/**
6531 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6532 * important bits.
6533 *
6534 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6535 * Trashes EFLAGS on AMD64.
6536 */
6537static uint32_t
6538iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6539{
6540 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6541 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6542 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6543 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6544
6545#ifdef RT_ARCH_AMD64
6546 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6547
6548 /* je/jz +1 */
6549 pbCodeBuf[off++] = 0x74;
6550 pbCodeBuf[off++] = 0x01;
6551
6552 /* int3 */
6553 pbCodeBuf[off++] = 0xcc;
6554
6555# elif defined(RT_ARCH_ARM64)
6556 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6557
6558 /* b.eq +1 */
6559 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6560 /* brk #0x2000 */
6561 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6562
6563# else
6564# error "Port me!"
6565# endif
6566 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6567
6568 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6569 return off;
6570}
6571#endif /* VBOX_STRICT */
6572
6573
6574/**
6575 * Emits a code for checking the return code of a call and rcPassUp, returning
6576 * from the code if either are non-zero.
6577 */
6578DECL_HIDDEN_THROW(uint32_t)
6579iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6580{
6581#ifdef RT_ARCH_AMD64
6582 /*
6583 * AMD64: eax = call status code.
6584 */
6585
6586 /* edx = rcPassUp */
6587 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6588# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6589 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6590# endif
6591
6592 /* edx = eax | rcPassUp */
6593 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6594 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6595 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6596 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6597
6598 /* Jump to non-zero status return path. */
6599 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6600
6601 /* done. */
6602
6603#elif RT_ARCH_ARM64
6604 /*
6605 * ARM64: w0 = call status code.
6606 */
6607# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6608 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6609# endif
6610 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6611
6612 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6613
6614 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6615
6616 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6617 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6618 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6619
6620#else
6621# error "port me"
6622#endif
6623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6624 RT_NOREF_PV(idxInstr);
6625 return off;
6626}
6627
6628
6629/**
6630 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6631 * raising a \#GP(0) if it isn't.
6632 *
6633 * @returns New code buffer offset, UINT32_MAX on failure.
6634 * @param pReNative The native recompile state.
6635 * @param off The code buffer offset.
6636 * @param idxAddrReg The host register with the address to check.
6637 * @param idxInstr The current instruction.
6638 */
6639DECL_HIDDEN_THROW(uint32_t)
6640iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6641{
6642 /*
6643 * Make sure we don't have any outstanding guest register writes as we may
6644 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6645 */
6646 off = iemNativeRegFlushPendingWrites(pReNative, off);
6647
6648#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6649 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6650#else
6651 RT_NOREF(idxInstr);
6652#endif
6653
6654#ifdef RT_ARCH_AMD64
6655 /*
6656 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6657 * return raisexcpt();
6658 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6659 */
6660 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6661
6662 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6663 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6664 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6665 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6666 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6667
6668 iemNativeRegFreeTmp(pReNative, iTmpReg);
6669
6670#elif defined(RT_ARCH_ARM64)
6671 /*
6672 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6673 * return raisexcpt();
6674 * ----
6675 * mov x1, 0x800000000000
6676 * add x1, x0, x1
6677 * cmp xzr, x1, lsr 48
6678 * b.ne .Lraisexcpt
6679 */
6680 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6681
6682 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6683 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6684 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6685 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6686
6687 iemNativeRegFreeTmp(pReNative, iTmpReg);
6688
6689#else
6690# error "Port me"
6691#endif
6692 return off;
6693}
6694
6695
6696/**
6697 * Emits code to check if that the content of @a idxAddrReg is within the limit
6698 * of CS, raising a \#GP(0) if it isn't.
6699 *
6700 * @returns New code buffer offset; throws VBox status code on error.
6701 * @param pReNative The native recompile state.
6702 * @param off The code buffer offset.
6703 * @param idxAddrReg The host register (32-bit) with the address to
6704 * check.
6705 * @param idxInstr The current instruction.
6706 */
6707DECL_HIDDEN_THROW(uint32_t)
6708iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6709 uint8_t idxAddrReg, uint8_t idxInstr)
6710{
6711 /*
6712 * Make sure we don't have any outstanding guest register writes as we may
6713 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6714 */
6715 off = iemNativeRegFlushPendingWrites(pReNative, off);
6716
6717#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6718 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6719#else
6720 RT_NOREF(idxInstr);
6721#endif
6722
6723 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6724 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6725 kIemNativeGstRegUse_ReadOnly);
6726
6727 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6728 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6729
6730 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6731 return off;
6732}
6733
6734
6735/**
6736 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
6737 *
6738 * @returns The flush mask.
6739 * @param fCImpl The IEM_CIMPL_F_XXX flags.
6740 * @param fGstShwFlush The starting flush mask.
6741 */
6742DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
6743{
6744 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
6745 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
6746 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
6747 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
6748 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
6749 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
6750 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
6751 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
6752 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
6753 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
6754 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
6755 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
6756 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6757 return fGstShwFlush;
6758}
6759
6760
6761/**
6762 * Emits a call to a CImpl function or something similar.
6763 */
6764DECL_HIDDEN_THROW(uint32_t)
6765iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6766 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6767{
6768 /* Writeback everything. */
6769 off = iemNativeRegFlushPendingWrites(pReNative, off);
6770
6771 /*
6772 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6773 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6774 */
6775 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6776 fGstShwFlush
6777 | RT_BIT_64(kIemNativeGstReg_Pc)
6778 | RT_BIT_64(kIemNativeGstReg_EFlags));
6779 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6780
6781 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6782
6783 /*
6784 * Load the parameters.
6785 */
6786#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6787 /* Special code the hidden VBOXSTRICTRC pointer. */
6788 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6789 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6790 if (cAddParams > 0)
6791 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6792 if (cAddParams > 1)
6793 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6794 if (cAddParams > 2)
6795 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6796 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6797
6798#else
6799 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6800 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6801 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6802 if (cAddParams > 0)
6803 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6804 if (cAddParams > 1)
6805 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6806 if (cAddParams > 2)
6807# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6808 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6809# else
6810 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6811# endif
6812#endif
6813
6814 /*
6815 * Make the call.
6816 */
6817 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6818
6819#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6820 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6821#endif
6822
6823 /*
6824 * Check the status code.
6825 */
6826 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6827}
6828
6829
6830/**
6831 * Emits a call to a threaded worker function.
6832 */
6833DECL_HIDDEN_THROW(uint32_t)
6834iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6835{
6836 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6837 off = iemNativeRegFlushPendingWrites(pReNative, off);
6838
6839 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6840 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6841
6842#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6843 /* The threaded function may throw / long jmp, so set current instruction
6844 number if we're counting. */
6845 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6846#endif
6847
6848 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6849
6850#ifdef RT_ARCH_AMD64
6851 /* Load the parameters and emit the call. */
6852# ifdef RT_OS_WINDOWS
6853# ifndef VBOXSTRICTRC_STRICT_ENABLED
6854 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6855 if (cParams > 0)
6856 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6857 if (cParams > 1)
6858 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6859 if (cParams > 2)
6860 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6861# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6862 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6863 if (cParams > 0)
6864 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6865 if (cParams > 1)
6866 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6867 if (cParams > 2)
6868 {
6869 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6870 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6871 }
6872 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6873# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6874# else
6875 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6876 if (cParams > 0)
6877 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6878 if (cParams > 1)
6879 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6880 if (cParams > 2)
6881 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6882# endif
6883
6884 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6885
6886# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6887 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6888# endif
6889
6890#elif RT_ARCH_ARM64
6891 /*
6892 * ARM64:
6893 */
6894 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6895 if (cParams > 0)
6896 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6897 if (cParams > 1)
6898 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6899 if (cParams > 2)
6900 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6901
6902 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6903
6904#else
6905# error "port me"
6906#endif
6907
6908 /*
6909 * Check the status code.
6910 */
6911 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6912
6913 return off;
6914}
6915
6916#ifdef VBOX_WITH_STATISTICS
6917/**
6918 * Emits code to update the thread call statistics.
6919 */
6920DECL_INLINE_THROW(uint32_t)
6921iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6922{
6923 /*
6924 * Update threaded function stats.
6925 */
6926 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6927 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6928# if defined(RT_ARCH_ARM64)
6929 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6930 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6931 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6932 iemNativeRegFreeTmp(pReNative, idxTmp1);
6933 iemNativeRegFreeTmp(pReNative, idxTmp2);
6934# else
6935 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6936# endif
6937 return off;
6938}
6939#endif /* VBOX_WITH_STATISTICS */
6940
6941
6942/**
6943 * Emits the code at the CheckBranchMiss label.
6944 */
6945static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6946{
6947 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6948 if (idxLabel != UINT32_MAX)
6949 {
6950 iemNativeLabelDefine(pReNative, idxLabel, off);
6951
6952 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6953 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6954 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6955
6956 /* jump back to the return sequence. */
6957 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6958 }
6959 return off;
6960}
6961
6962
6963/**
6964 * Emits the code at the NeedCsLimChecking label.
6965 */
6966static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6967{
6968 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6969 if (idxLabel != UINT32_MAX)
6970 {
6971 iemNativeLabelDefine(pReNative, idxLabel, off);
6972
6973 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6974 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6975 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6976
6977 /* jump back to the return sequence. */
6978 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6979 }
6980 return off;
6981}
6982
6983
6984/**
6985 * Emits the code at the ObsoleteTb label.
6986 */
6987static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6988{
6989 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6990 if (idxLabel != UINT32_MAX)
6991 {
6992 iemNativeLabelDefine(pReNative, idxLabel, off);
6993
6994 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6995 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6996 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6997
6998 /* jump back to the return sequence. */
6999 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7000 }
7001 return off;
7002}
7003
7004
7005/**
7006 * Emits the code at the RaiseGP0 label.
7007 */
7008static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7009{
7010 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
7011 if (idxLabel != UINT32_MAX)
7012 {
7013 iemNativeLabelDefine(pReNative, idxLabel, off);
7014
7015 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
7016 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7017 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
7018
7019 /* jump back to the return sequence. */
7020 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7021 }
7022 return off;
7023}
7024
7025
7026/**
7027 * Emits the code at the RaiseNm label.
7028 */
7029static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7030{
7031 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
7032 if (idxLabel != UINT32_MAX)
7033 {
7034 iemNativeLabelDefine(pReNative, idxLabel, off);
7035
7036 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
7037 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7038 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
7039
7040 /* jump back to the return sequence. */
7041 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7042 }
7043 return off;
7044}
7045
7046
7047/**
7048 * Emits the code at the RaiseUd label.
7049 */
7050static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7051{
7052 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
7053 if (idxLabel != UINT32_MAX)
7054 {
7055 iemNativeLabelDefine(pReNative, idxLabel, off);
7056
7057 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
7058 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7059 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
7060
7061 /* jump back to the return sequence. */
7062 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7063 }
7064 return off;
7065}
7066
7067
7068/**
7069 * Emits the code at the RaiseMf label.
7070 */
7071static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7072{
7073 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
7074 if (idxLabel != UINT32_MAX)
7075 {
7076 iemNativeLabelDefine(pReNative, idxLabel, off);
7077
7078 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
7079 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7080 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
7081
7082 /* jump back to the return sequence. */
7083 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7084 }
7085 return off;
7086}
7087
7088
7089/**
7090 * Emits the code at the RaiseXf label.
7091 */
7092static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7093{
7094 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
7095 if (idxLabel != UINT32_MAX)
7096 {
7097 iemNativeLabelDefine(pReNative, idxLabel, off);
7098
7099 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
7100 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7101 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
7102
7103 /* jump back to the return sequence. */
7104 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7105 }
7106 return off;
7107}
7108
7109
7110/**
7111 * Emits the code at the ReturnWithFlags label (returns
7112 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7113 */
7114static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7115{
7116 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7117 if (idxLabel != UINT32_MAX)
7118 {
7119 iemNativeLabelDefine(pReNative, idxLabel, off);
7120
7121 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7122
7123 /* jump back to the return sequence. */
7124 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7125 }
7126 return off;
7127}
7128
7129
7130/**
7131 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7132 */
7133static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7134{
7135 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7136 if (idxLabel != UINT32_MAX)
7137 {
7138 iemNativeLabelDefine(pReNative, idxLabel, off);
7139
7140 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7141
7142 /* jump back to the return sequence. */
7143 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7144 }
7145 return off;
7146}
7147
7148
7149/**
7150 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7151 */
7152static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7153{
7154 /*
7155 * Generate the rc + rcPassUp fiddling code if needed.
7156 */
7157 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7158 if (idxLabel != UINT32_MAX)
7159 {
7160 iemNativeLabelDefine(pReNative, idxLabel, off);
7161
7162 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7163#ifdef RT_ARCH_AMD64
7164# ifdef RT_OS_WINDOWS
7165# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7166 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7167# endif
7168 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7169 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7170# else
7171 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7172 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7173# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7174 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7175# endif
7176# endif
7177# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7178 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7179# endif
7180
7181#else
7182 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7183 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7184 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7185#endif
7186
7187 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7188 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7189 }
7190 return off;
7191}
7192
7193
7194/**
7195 * Emits a standard epilog.
7196 */
7197static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7198{
7199 *pidxReturnLabel = UINT32_MAX;
7200
7201 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7202 off = iemNativeRegFlushPendingWrites(pReNative, off);
7203
7204 /*
7205 * Successful return, so clear the return register (eax, w0).
7206 */
7207 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7208
7209 /*
7210 * Define label for common return point.
7211 */
7212 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7213 *pidxReturnLabel = idxReturn;
7214
7215 /*
7216 * Restore registers and return.
7217 */
7218#ifdef RT_ARCH_AMD64
7219 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7220
7221 /* Reposition esp at the r15 restore point. */
7222 pbCodeBuf[off++] = X86_OP_REX_W;
7223 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7224 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7225 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7226
7227 /* Pop non-volatile registers and return */
7228 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7229 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7230 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7231 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7232 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7233 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7234 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7235 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7236# ifdef RT_OS_WINDOWS
7237 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7238 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7239# endif
7240 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7241 pbCodeBuf[off++] = 0xc9; /* leave */
7242 pbCodeBuf[off++] = 0xc3; /* ret */
7243 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7244
7245#elif RT_ARCH_ARM64
7246 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7247
7248 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7249 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7250 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7251 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7252 IEMNATIVE_FRAME_VAR_SIZE / 8);
7253 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7254 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7255 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7256 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7257 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7258 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7259 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7260 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7261 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7262 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7263 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7264 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7265
7266 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7267 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7268 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7269 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7270
7271 /* retab / ret */
7272# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7273 if (1)
7274 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7275 else
7276# endif
7277 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7278
7279#else
7280# error "port me"
7281#endif
7282 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7283
7284 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7285}
7286
7287
7288/**
7289 * Emits a standard prolog.
7290 */
7291static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7292{
7293#ifdef RT_ARCH_AMD64
7294 /*
7295 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7296 * reserving 64 bytes for stack variables plus 4 non-register argument
7297 * slots. Fixed register assignment: xBX = pReNative;
7298 *
7299 * Since we always do the same register spilling, we can use the same
7300 * unwind description for all the code.
7301 */
7302 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7303 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7304 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7305 pbCodeBuf[off++] = 0x8b;
7306 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7307 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7308 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7309# ifdef RT_OS_WINDOWS
7310 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7311 pbCodeBuf[off++] = 0x8b;
7312 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7313 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7314 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7315# else
7316 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7317 pbCodeBuf[off++] = 0x8b;
7318 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7319# endif
7320 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7321 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7322 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7323 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7324 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7325 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7326 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7327 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7328
7329# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7330 /* Save the frame pointer. */
7331 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7332# endif
7333
7334 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7335 X86_GREG_xSP,
7336 IEMNATIVE_FRAME_ALIGN_SIZE
7337 + IEMNATIVE_FRAME_VAR_SIZE
7338 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7339 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7340 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7341 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7342 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7343
7344#elif RT_ARCH_ARM64
7345 /*
7346 * We set up a stack frame exactly like on x86, only we have to push the
7347 * return address our selves here. We save all non-volatile registers.
7348 */
7349 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7350
7351# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7352 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7353 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7354 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7355 /* pacibsp */
7356 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7357# endif
7358
7359 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7360 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7361 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7362 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7363 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7364 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7365 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7366 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7367 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7368 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7369 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7370 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7371 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7372 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7373 /* Save the BP and LR (ret address) registers at the top of the frame. */
7374 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7375 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7376 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7377 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7378 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7379 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7380
7381 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7382 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7383
7384 /* mov r28, r0 */
7385 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7386 /* mov r27, r1 */
7387 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7388
7389# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7390 /* Save the frame pointer. */
7391 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7392 ARMV8_A64_REG_X2);
7393# endif
7394
7395#else
7396# error "port me"
7397#endif
7398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7399 return off;
7400}
7401
7402
7403
7404
7405/*********************************************************************************************************************************
7406* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
7407*********************************************************************************************************************************/
7408
7409#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
7410 { \
7411 Assert(pReNative->Core.bmVars == 0); \
7412 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
7413 Assert(pReNative->Core.bmStack == 0); \
7414 pReNative->fMc = (a_fMcFlags); \
7415 pReNative->fCImpl = (a_fCImplFlags); \
7416 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
7417
7418/** We have to get to the end in recompilation mode, as otherwise we won't
7419 * generate code for all the IEM_MC_IF_XXX branches. */
7420#define IEM_MC_END() \
7421 iemNativeVarFreeAll(pReNative); \
7422 } return off
7423
7424
7425
7426/*********************************************************************************************************************************
7427* Native Emitter Support. *
7428*********************************************************************************************************************************/
7429
7430
7431#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
7432
7433#define IEM_MC_NATIVE_ELSE() } else {
7434
7435#define IEM_MC_NATIVE_ENDIF() } ((void)0)
7436
7437
7438#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
7439 off = a_fnEmitter(pReNative, off)
7440
7441#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
7442 off = a_fnEmitter(pReNative, off, (a0))
7443
7444#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
7445 off = a_fnEmitter(pReNative, off, (a0), (a1))
7446
7447#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
7448 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
7449
7450#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
7451 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
7452
7453#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
7454 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
7455
7456#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
7457 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
7458
7459#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
7460 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
7461
7462#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
7463 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
7464
7465
7466
7467/*********************************************************************************************************************************
7468* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
7469*********************************************************************************************************************************/
7470
7471#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
7472 pReNative->fMc = 0; \
7473 pReNative->fCImpl = (a_fFlags); \
7474 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
7475
7476
7477#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7478 pReNative->fMc = 0; \
7479 pReNative->fCImpl = (a_fFlags); \
7480 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
7481
7482DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7483 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7484 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
7485{
7486 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
7487}
7488
7489
7490#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7491 pReNative->fMc = 0; \
7492 pReNative->fCImpl = (a_fFlags); \
7493 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7494 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
7495
7496DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7497 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7498 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
7499{
7500 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
7501}
7502
7503
7504#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7505 pReNative->fMc = 0; \
7506 pReNative->fCImpl = (a_fFlags); \
7507 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7508 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
7509
7510DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7511 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7512 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
7513 uint64_t uArg2)
7514{
7515 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
7516}
7517
7518
7519
7520/*********************************************************************************************************************************
7521* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
7522*********************************************************************************************************************************/
7523
7524/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
7525 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
7526DECL_INLINE_THROW(uint32_t)
7527iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7528{
7529 /*
7530 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
7531 * return with special status code and make the execution loop deal with
7532 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
7533 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
7534 * could continue w/o interruption, it probably will drop into the
7535 * debugger, so not worth the effort of trying to services it here and we
7536 * just lump it in with the handling of the others.
7537 *
7538 * To simplify the code and the register state management even more (wrt
7539 * immediate in AND operation), we always update the flags and skip the
7540 * extra check associated conditional jump.
7541 */
7542 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
7543 <= UINT32_MAX);
7544#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7545 AssertMsg( pReNative->idxCurCall == 0
7546 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
7547 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
7548#endif
7549
7550 /*
7551 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
7552 * any pending register writes must be flushed.
7553 */
7554 off = iemNativeRegFlushPendingWrites(pReNative, off);
7555
7556 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7557 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
7558 true /*fSkipLivenessAssert*/);
7559 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
7560 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
7561 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
7562 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
7563 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
7564
7565 /* Free but don't flush the EFLAGS register. */
7566 iemNativeRegFreeTmp(pReNative, idxEflReg);
7567
7568 return off;
7569}
7570
7571
7572/** The VINF_SUCCESS dummy. */
7573template<int const a_rcNormal>
7574DECL_FORCE_INLINE(uint32_t)
7575iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7576{
7577 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
7578 if (a_rcNormal != VINF_SUCCESS)
7579 {
7580#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7581 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7582#else
7583 RT_NOREF_PV(idxInstr);
7584#endif
7585
7586 /* As this code returns from the TB any pending register writes must be flushed. */
7587 off = iemNativeRegFlushPendingWrites(pReNative, off);
7588
7589 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
7590 }
7591 return off;
7592}
7593
7594
7595#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
7596 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7597 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7598
7599#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7600 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7601 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7602 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7603
7604/** Same as iemRegAddToRip64AndFinishingNoFlags. */
7605DECL_INLINE_THROW(uint32_t)
7606iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7607{
7608#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7609# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7610 if (!pReNative->Core.offPc)
7611 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7612# endif
7613
7614 /* Allocate a temporary PC register. */
7615 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7616
7617 /* Perform the addition and store the result. */
7618 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
7619 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7620
7621 /* Free but don't flush the PC register. */
7622 iemNativeRegFreeTmp(pReNative, idxPcReg);
7623#endif
7624
7625#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7626 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7627
7628 pReNative->Core.offPc += cbInstr;
7629# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7630 off = iemNativePcAdjustCheck(pReNative, off);
7631# endif
7632 if (pReNative->cCondDepth)
7633 off = iemNativeEmitPcWriteback(pReNative, off);
7634 else
7635 pReNative->Core.cInstrPcUpdateSkipped++;
7636#endif
7637
7638 return off;
7639}
7640
7641
7642#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
7643 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7644 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7645
7646#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7647 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7648 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7649 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7650
7651/** Same as iemRegAddToEip32AndFinishingNoFlags. */
7652DECL_INLINE_THROW(uint32_t)
7653iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7654{
7655#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7656# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7657 if (!pReNative->Core.offPc)
7658 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7659# endif
7660
7661 /* Allocate a temporary PC register. */
7662 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7663
7664 /* Perform the addition and store the result. */
7665 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7666 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7667
7668 /* Free but don't flush the PC register. */
7669 iemNativeRegFreeTmp(pReNative, idxPcReg);
7670#endif
7671
7672#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7673 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7674
7675 pReNative->Core.offPc += cbInstr;
7676# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7677 off = iemNativePcAdjustCheck(pReNative, off);
7678# endif
7679 if (pReNative->cCondDepth)
7680 off = iemNativeEmitPcWriteback(pReNative, off);
7681 else
7682 pReNative->Core.cInstrPcUpdateSkipped++;
7683#endif
7684
7685 return off;
7686}
7687
7688
7689#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
7690 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7691 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7692
7693#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7694 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7695 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7696 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7697
7698/** Same as iemRegAddToIp16AndFinishingNoFlags. */
7699DECL_INLINE_THROW(uint32_t)
7700iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7701{
7702#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7703# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7704 if (!pReNative->Core.offPc)
7705 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7706# endif
7707
7708 /* Allocate a temporary PC register. */
7709 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7710
7711 /* Perform the addition and store the result. */
7712 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7713 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7714 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7715
7716 /* Free but don't flush the PC register. */
7717 iemNativeRegFreeTmp(pReNative, idxPcReg);
7718#endif
7719
7720#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7721 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7722
7723 pReNative->Core.offPc += cbInstr;
7724# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7725 off = iemNativePcAdjustCheck(pReNative, off);
7726# endif
7727 if (pReNative->cCondDepth)
7728 off = iemNativeEmitPcWriteback(pReNative, off);
7729 else
7730 pReNative->Core.cInstrPcUpdateSkipped++;
7731#endif
7732
7733 return off;
7734}
7735
7736
7737
7738/*********************************************************************************************************************************
7739* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
7740*********************************************************************************************************************************/
7741
7742#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7743 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7744 (a_enmEffOpSize), pCallEntry->idxInstr); \
7745 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7746
7747#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7748 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7749 (a_enmEffOpSize), pCallEntry->idxInstr); \
7750 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7751 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7752
7753#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
7754 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7755 IEMMODE_16BIT, pCallEntry->idxInstr); \
7756 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7757
7758#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7759 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7760 IEMMODE_16BIT, pCallEntry->idxInstr); \
7761 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7762 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7763
7764#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
7765 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7766 IEMMODE_64BIT, pCallEntry->idxInstr); \
7767 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7768
7769#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7770 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7771 IEMMODE_64BIT, pCallEntry->idxInstr); \
7772 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7773 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7774
7775/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
7776 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
7777 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
7778DECL_INLINE_THROW(uint32_t)
7779iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7780 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7781{
7782 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
7783
7784 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7785 off = iemNativeRegFlushPendingWrites(pReNative, off);
7786
7787#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7788 Assert(pReNative->Core.offPc == 0);
7789
7790 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7791#endif
7792
7793 /* Allocate a temporary PC register. */
7794 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7795
7796 /* Perform the addition. */
7797 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
7798
7799 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
7800 {
7801 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
7802 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7803 }
7804 else
7805 {
7806 /* Just truncate the result to 16-bit IP. */
7807 Assert(enmEffOpSize == IEMMODE_16BIT);
7808 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7809 }
7810 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7811
7812 /* Free but don't flush the PC register. */
7813 iemNativeRegFreeTmp(pReNative, idxPcReg);
7814
7815 return off;
7816}
7817
7818
7819#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7820 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7821 (a_enmEffOpSize), pCallEntry->idxInstr); \
7822 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7823
7824#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7825 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7826 (a_enmEffOpSize), pCallEntry->idxInstr); \
7827 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7828 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7829
7830#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
7831 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7832 IEMMODE_16BIT, pCallEntry->idxInstr); \
7833 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7834
7835#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7836 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7837 IEMMODE_16BIT, pCallEntry->idxInstr); \
7838 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7839 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7840
7841#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
7842 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7843 IEMMODE_32BIT, pCallEntry->idxInstr); \
7844 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7845
7846#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7847 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7848 IEMMODE_32BIT, pCallEntry->idxInstr); \
7849 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7850 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7851
7852/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
7853 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
7854 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
7855DECL_INLINE_THROW(uint32_t)
7856iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7857 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7858{
7859 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
7860
7861 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7862 off = iemNativeRegFlushPendingWrites(pReNative, off);
7863
7864#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7865 Assert(pReNative->Core.offPc == 0);
7866
7867 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7868#endif
7869
7870 /* Allocate a temporary PC register. */
7871 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7872
7873 /* Perform the addition. */
7874 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7875
7876 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
7877 if (enmEffOpSize == IEMMODE_16BIT)
7878 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7879
7880 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
7881/** @todo we can skip this in 32-bit FLAT mode. */
7882 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7883
7884 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7885
7886 /* Free but don't flush the PC register. */
7887 iemNativeRegFreeTmp(pReNative, idxPcReg);
7888
7889 return off;
7890}
7891
7892
7893#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
7894 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7895 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7896
7897#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
7898 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7899 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7900 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7901
7902#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
7903 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7904 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7905
7906#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7907 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7908 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7909 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7910
7911#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
7912 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7913 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7914
7915#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7916 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7917 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7918 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7919
7920/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
7921DECL_INLINE_THROW(uint32_t)
7922iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7923 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
7924{
7925 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7926 off = iemNativeRegFlushPendingWrites(pReNative, off);
7927
7928#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7929 Assert(pReNative->Core.offPc == 0);
7930
7931 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7932#endif
7933
7934 /* Allocate a temporary PC register. */
7935 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7936
7937 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
7938 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7939 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7940 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7941 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7942
7943 /* Free but don't flush the PC register. */
7944 iemNativeRegFreeTmp(pReNative, idxPcReg);
7945
7946 return off;
7947}
7948
7949
7950
7951/*********************************************************************************************************************************
7952* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
7953*********************************************************************************************************************************/
7954
7955/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
7956#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
7957 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7958
7959/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
7960#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
7961 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7962
7963/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
7964#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
7965 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7966
7967/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
7968 * clears flags. */
7969#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
7970 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
7971 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7972
7973/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
7974 * clears flags. */
7975#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
7976 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
7977 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7978
7979/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
7980 * clears flags. */
7981#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
7982 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
7983 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7984
7985#undef IEM_MC_SET_RIP_U16_AND_FINISH
7986
7987
7988/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
7989#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
7990 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7991
7992/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
7993#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
7994 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7995
7996/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
7997 * clears flags. */
7998#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
7999 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
8000 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8001
8002/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
8003 * and clears flags. */
8004#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
8005 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
8006 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8007
8008#undef IEM_MC_SET_RIP_U32_AND_FINISH
8009
8010
8011/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
8012#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
8013 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
8014
8015/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
8016 * and clears flags. */
8017#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
8018 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
8019 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8020
8021#undef IEM_MC_SET_RIP_U64_AND_FINISH
8022
8023
8024/** Same as iemRegRipJumpU16AndFinishNoFlags,
8025 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
8026DECL_INLINE_THROW(uint32_t)
8027iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
8028 uint8_t idxInstr, uint8_t cbVar)
8029{
8030 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
8031 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
8032
8033 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
8034 off = iemNativeRegFlushPendingWrites(pReNative, off);
8035
8036#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8037 Assert(pReNative->Core.offPc == 0);
8038
8039 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
8040#endif
8041
8042 /* Get a register with the new PC loaded from idxVarPc.
8043 Note! This ASSUMES that the high bits of the GPR is zeroed. */
8044 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
8045
8046 /* Check limit (may #GP(0) + exit TB). */
8047 if (!f64Bit)
8048/** @todo we can skip this test in FLAT 32-bit mode. */
8049 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8050 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
8051 else if (cbVar > sizeof(uint32_t))
8052 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8053
8054 /* Store the result. */
8055 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
8056
8057 iemNativeVarRegisterRelease(pReNative, idxVarPc);
8058 /** @todo implictly free the variable? */
8059
8060 return off;
8061}
8062
8063
8064
8065/*********************************************************************************************************************************
8066* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
8067*********************************************************************************************************************************/
8068
8069#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
8070 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
8071
8072/**
8073 * Emits code to check if a \#NM exception should be raised.
8074 *
8075 * @returns New code buffer offset, UINT32_MAX on failure.
8076 * @param pReNative The native recompile state.
8077 * @param off The code buffer offset.
8078 * @param idxInstr The current instruction.
8079 */
8080DECL_INLINE_THROW(uint32_t)
8081iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8082{
8083 /*
8084 * Make sure we don't have any outstanding guest register writes as we may
8085 * raise an #NM and all guest register must be up to date in CPUMCTX.
8086 *
8087 * @todo r=aeichner Can we postpone this to the RaiseNm path?
8088 */
8089 off = iemNativeRegFlushPendingWrites(pReNative, off);
8090
8091#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8092 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8093#else
8094 RT_NOREF(idxInstr);
8095#endif
8096
8097 /* Allocate a temporary CR0 register. */
8098 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8099 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8100
8101 /*
8102 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
8103 * return raisexcpt();
8104 */
8105 /* Test and jump. */
8106 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
8107
8108 /* Free but don't flush the CR0 register. */
8109 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8110
8111 return off;
8112}
8113
8114
8115#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
8116 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
8117
8118/**
8119 * Emits code to check if a \#MF exception should be raised.
8120 *
8121 * @returns New code buffer offset, UINT32_MAX on failure.
8122 * @param pReNative The native recompile state.
8123 * @param off The code buffer offset.
8124 * @param idxInstr The current instruction.
8125 */
8126DECL_INLINE_THROW(uint32_t)
8127iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8128{
8129 /*
8130 * Make sure we don't have any outstanding guest register writes as we may
8131 * raise an #MF and all guest register must be up to date in CPUMCTX.
8132 *
8133 * @todo r=aeichner Can we postpone this to the RaiseMf path?
8134 */
8135 off = iemNativeRegFlushPendingWrites(pReNative, off);
8136
8137#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8138 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8139#else
8140 RT_NOREF(idxInstr);
8141#endif
8142
8143 /* Allocate a temporary FSW register. */
8144 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
8145 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
8146
8147 /*
8148 * if (FSW & X86_FSW_ES != 0)
8149 * return raisexcpt();
8150 */
8151 /* Test and jump. */
8152 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
8153
8154 /* Free but don't flush the FSW register. */
8155 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
8156
8157 return off;
8158}
8159
8160
8161#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
8162 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8163
8164/**
8165 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
8166 *
8167 * @returns New code buffer offset, UINT32_MAX on failure.
8168 * @param pReNative The native recompile state.
8169 * @param off The code buffer offset.
8170 * @param idxInstr The current instruction.
8171 */
8172DECL_INLINE_THROW(uint32_t)
8173iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8174{
8175 /*
8176 * Make sure we don't have any outstanding guest register writes as we may
8177 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8178 *
8179 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8180 */
8181 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8182
8183#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8184 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8185#else
8186 RT_NOREF(idxInstr);
8187#endif
8188
8189 /* Allocate a temporary CR0 and CR4 register. */
8190 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8191 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8192 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8193 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8194
8195 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8196 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
8197 * actual performance benefit first). */
8198 /*
8199 * if (cr0 & X86_CR0_EM)
8200 * return raisexcpt();
8201 */
8202 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_EM_BIT, idxLabelRaiseUd);
8203 /*
8204 * if (!(cr4 & X86_CR4_OSFXSR))
8205 * return raisexcpt();
8206 */
8207 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR_BIT, idxLabelRaiseUd);
8208 /*
8209 * if (cr0 & X86_CR0_TS)
8210 * return raisexcpt();
8211 */
8212 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8213
8214 /* Free but don't flush the CR0 and CR4 register. */
8215 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8216 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8217
8218 return off;
8219}
8220
8221
8222#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
8223 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8224
8225/**
8226 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
8227 *
8228 * @returns New code buffer offset, UINT32_MAX on failure.
8229 * @param pReNative The native recompile state.
8230 * @param off The code buffer offset.
8231 * @param idxInstr The current instruction.
8232 */
8233DECL_INLINE_THROW(uint32_t)
8234iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8235{
8236 /*
8237 * Make sure we don't have any outstanding guest register writes as we may
8238 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8239 *
8240 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8241 */
8242 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8243
8244#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8245 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8246#else
8247 RT_NOREF(idxInstr);
8248#endif
8249
8250 /* Allocate a temporary CR0, CR4 and XCR0 register. */
8251 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8252 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8253 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
8254 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8255 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8256
8257 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8258 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
8259 * actual performance benefit first). */
8260 /*
8261 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
8262 * return raisexcpt();
8263 */
8264 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
8265 off = iemNativeEmitOrGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
8266 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
8267 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8268
8269 /*
8270 * if (!(cr4 & X86_CR4_OSXSAVE))
8271 * return raisexcpt();
8272 */
8273 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
8274 /*
8275 * if (cr0 & X86_CR0_TS)
8276 * return raisexcpt();
8277 */
8278 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8279
8280 /* Free but don't flush the CR0, CR4 and XCR0 register. */
8281 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8282 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8283 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
8284
8285 return off;
8286}
8287
8288
8289#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
8290 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
8291
8292/**
8293 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
8294 *
8295 * @returns New code buffer offset, UINT32_MAX on failure.
8296 * @param pReNative The native recompile state.
8297 * @param off The code buffer offset.
8298 * @param idxInstr The current instruction.
8299 */
8300DECL_INLINE_THROW(uint32_t)
8301iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8302{
8303 /*
8304 * Make sure we don't have any outstanding guest register writes as we may
8305 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8306 *
8307 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8308 */
8309 off = iemNativeRegFlushPendingWrites(pReNative, off);
8310
8311#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8312 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8313#else
8314 RT_NOREF(idxInstr);
8315#endif
8316
8317 /* Allocate a temporary CR4 register. */
8318 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8319 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
8320 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8321
8322 /*
8323 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
8324 * return raisexcpt();
8325 */
8326 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
8327
8328 /* raise \#UD exception unconditionally. */
8329 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
8330
8331 /* Free but don't flush the CR4 register. */
8332 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8333
8334 return off;
8335}
8336
8337
8338
8339/*********************************************************************************************************************************
8340* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
8341*********************************************************************************************************************************/
8342
8343/**
8344 * Pushes an IEM_MC_IF_XXX onto the condition stack.
8345 *
8346 * @returns Pointer to the condition stack entry on success, NULL on failure
8347 * (too many nestings)
8348 */
8349DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
8350{
8351#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8352 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
8353#endif
8354
8355 uint32_t const idxStack = pReNative->cCondDepth;
8356 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
8357
8358 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
8359 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
8360
8361 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
8362 pEntry->fInElse = false;
8363 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
8364 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
8365
8366 return pEntry;
8367}
8368
8369
8370/**
8371 * Start of the if-block, snapshotting the register and variable state.
8372 */
8373DECL_INLINE_THROW(void)
8374iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
8375{
8376 Assert(offIfBlock != UINT32_MAX);
8377 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8378 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8379 Assert(!pEntry->fInElse);
8380
8381 /* Define the start of the IF block if request or for disassembly purposes. */
8382 if (idxLabelIf != UINT32_MAX)
8383 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
8384#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8385 else
8386 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
8387#else
8388 RT_NOREF(offIfBlock);
8389#endif
8390
8391#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8392 Assert(pReNative->Core.offPc == 0);
8393#endif
8394
8395 /* Copy the initial state so we can restore it in the 'else' block. */
8396 pEntry->InitialState = pReNative->Core;
8397}
8398
8399
8400#define IEM_MC_ELSE() } while (0); \
8401 off = iemNativeEmitElse(pReNative, off); \
8402 do {
8403
8404/** Emits code related to IEM_MC_ELSE. */
8405DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8406{
8407 /* Check sanity and get the conditional stack entry. */
8408 Assert(off != UINT32_MAX);
8409 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8410 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8411 Assert(!pEntry->fInElse);
8412
8413 /* Jump to the endif */
8414 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
8415
8416 /* Define the else label and enter the else part of the condition. */
8417 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8418 pEntry->fInElse = true;
8419
8420#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8421 Assert(pReNative->Core.offPc == 0);
8422#endif
8423
8424 /* Snapshot the core state so we can do a merge at the endif and restore
8425 the snapshot we took at the start of the if-block. */
8426 pEntry->IfFinalState = pReNative->Core;
8427 pReNative->Core = pEntry->InitialState;
8428
8429 return off;
8430}
8431
8432
8433#define IEM_MC_ENDIF() } while (0); \
8434 off = iemNativeEmitEndIf(pReNative, off)
8435
8436/** Emits code related to IEM_MC_ENDIF. */
8437DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8438{
8439 /* Check sanity and get the conditional stack entry. */
8440 Assert(off != UINT32_MAX);
8441 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8442 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8443
8444#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8445 Assert(pReNative->Core.offPc == 0);
8446#endif
8447
8448 /*
8449 * Now we have find common group with the core state at the end of the
8450 * if-final. Use the smallest common denominator and just drop anything
8451 * that isn't the same in both states.
8452 */
8453 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
8454 * which is why we're doing this at the end of the else-block.
8455 * But we'd need more info about future for that to be worth the effort. */
8456 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
8457 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
8458 {
8459 /* shadow guest stuff first. */
8460 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
8461 if (fGstRegs)
8462 {
8463 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
8464 do
8465 {
8466 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
8467 fGstRegs &= ~RT_BIT_64(idxGstReg);
8468
8469 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
8470 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
8471 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
8472 {
8473 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
8474 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
8475 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
8476 }
8477 } while (fGstRegs);
8478 }
8479 else
8480 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
8481
8482 /* Check variables next. For now we must require them to be identical
8483 or stuff we can recreate. */
8484 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
8485 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
8486 if (fVars)
8487 {
8488 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
8489 do
8490 {
8491 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
8492 fVars &= ~RT_BIT_32(idxVar);
8493
8494 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
8495 {
8496 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
8497 continue;
8498 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
8499 {
8500 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8501 if (idxHstReg != UINT8_MAX)
8502 {
8503 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8504 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8505 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
8506 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8507 }
8508 continue;
8509 }
8510 }
8511 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
8512 continue;
8513
8514 /* Irreconcilable, so drop it. */
8515 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8516 if (idxHstReg != UINT8_MAX)
8517 {
8518 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8519 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8520 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
8521 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8522 }
8523 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8524 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8525 } while (fVars);
8526 }
8527
8528 /* Finally, check that the host register allocations matches. */
8529 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
8530 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
8531 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
8532 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
8533 }
8534
8535 /*
8536 * Define the endif label and maybe the else one if we're still in the 'if' part.
8537 */
8538 if (!pEntry->fInElse)
8539 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8540 else
8541 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
8542 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
8543
8544 /* Pop the conditional stack.*/
8545 pReNative->cCondDepth -= 1;
8546
8547 return off;
8548}
8549
8550
8551#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
8552 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
8553 do {
8554
8555/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
8556DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8557{
8558 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8559
8560 /* Get the eflags. */
8561 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8562 kIemNativeGstRegUse_ReadOnly);
8563
8564 /* Test and jump. */
8565 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8566
8567 /* Free but don't flush the EFlags register. */
8568 iemNativeRegFreeTmp(pReNative, idxEflReg);
8569
8570 /* Make a copy of the core state now as we start the if-block. */
8571 iemNativeCondStartIfBlock(pReNative, off);
8572
8573 return off;
8574}
8575
8576
8577#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
8578 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
8579 do {
8580
8581/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
8582DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8583{
8584 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8585
8586 /* Get the eflags. */
8587 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8588 kIemNativeGstRegUse_ReadOnly);
8589
8590 /* Test and jump. */
8591 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8592
8593 /* Free but don't flush the EFlags register. */
8594 iemNativeRegFreeTmp(pReNative, idxEflReg);
8595
8596 /* Make a copy of the core state now as we start the if-block. */
8597 iemNativeCondStartIfBlock(pReNative, off);
8598
8599 return off;
8600}
8601
8602
8603#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
8604 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
8605 do {
8606
8607/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
8608DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8609{
8610 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8611
8612 /* Get the eflags. */
8613 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8614 kIemNativeGstRegUse_ReadOnly);
8615
8616 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8617 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8618
8619 /* Test and jump. */
8620 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8621
8622 /* Free but don't flush the EFlags register. */
8623 iemNativeRegFreeTmp(pReNative, idxEflReg);
8624
8625 /* Make a copy of the core state now as we start the if-block. */
8626 iemNativeCondStartIfBlock(pReNative, off);
8627
8628 return off;
8629}
8630
8631
8632#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
8633 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
8634 do {
8635
8636/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
8637DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8638{
8639 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8640
8641 /* Get the eflags. */
8642 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8643 kIemNativeGstRegUse_ReadOnly);
8644
8645 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8646 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8647
8648 /* Test and jump. */
8649 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8650
8651 /* Free but don't flush the EFlags register. */
8652 iemNativeRegFreeTmp(pReNative, idxEflReg);
8653
8654 /* Make a copy of the core state now as we start the if-block. */
8655 iemNativeCondStartIfBlock(pReNative, off);
8656
8657 return off;
8658}
8659
8660
8661#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
8662 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
8663 do {
8664
8665#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
8666 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
8667 do {
8668
8669/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
8670DECL_INLINE_THROW(uint32_t)
8671iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8672 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8673{
8674 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8675
8676 /* Get the eflags. */
8677 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8678 kIemNativeGstRegUse_ReadOnly);
8679
8680 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8681 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8682
8683 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8684 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8685 Assert(iBitNo1 != iBitNo2);
8686
8687#ifdef RT_ARCH_AMD64
8688 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
8689
8690 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8691 if (iBitNo1 > iBitNo2)
8692 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8693 else
8694 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8695 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8696
8697#elif defined(RT_ARCH_ARM64)
8698 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8699 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8700
8701 /* and tmpreg, eflreg, #1<<iBitNo1 */
8702 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8703
8704 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8705 if (iBitNo1 > iBitNo2)
8706 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8707 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8708 else
8709 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8710 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8711
8712 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8713
8714#else
8715# error "Port me"
8716#endif
8717
8718 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8719 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8720 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8721
8722 /* Free but don't flush the EFlags and tmp registers. */
8723 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8724 iemNativeRegFreeTmp(pReNative, idxEflReg);
8725
8726 /* Make a copy of the core state now as we start the if-block. */
8727 iemNativeCondStartIfBlock(pReNative, off);
8728
8729 return off;
8730}
8731
8732
8733#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
8734 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
8735 do {
8736
8737#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
8738 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
8739 do {
8740
8741/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
8742 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
8743DECL_INLINE_THROW(uint32_t)
8744iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
8745 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8746{
8747 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8748
8749 /* We need an if-block label for the non-inverted variant. */
8750 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
8751 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
8752
8753 /* Get the eflags. */
8754 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8755 kIemNativeGstRegUse_ReadOnly);
8756
8757 /* Translate the flag masks to bit numbers. */
8758 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8759 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8760
8761 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8762 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8763 Assert(iBitNo1 != iBitNo);
8764
8765 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8766 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8767 Assert(iBitNo2 != iBitNo);
8768 Assert(iBitNo2 != iBitNo1);
8769
8770#ifdef RT_ARCH_AMD64
8771 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
8772#elif defined(RT_ARCH_ARM64)
8773 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8774#endif
8775
8776 /* Check for the lone bit first. */
8777 if (!fInverted)
8778 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8779 else
8780 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
8781
8782 /* Then extract and compare the other two bits. */
8783#ifdef RT_ARCH_AMD64
8784 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8785 if (iBitNo1 > iBitNo2)
8786 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8787 else
8788 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8789 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8790
8791#elif defined(RT_ARCH_ARM64)
8792 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8793
8794 /* and tmpreg, eflreg, #1<<iBitNo1 */
8795 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8796
8797 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8798 if (iBitNo1 > iBitNo2)
8799 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8800 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8801 else
8802 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8803 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8804
8805 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8806
8807#else
8808# error "Port me"
8809#endif
8810
8811 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8812 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8813 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8814
8815 /* Free but don't flush the EFlags and tmp registers. */
8816 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8817 iemNativeRegFreeTmp(pReNative, idxEflReg);
8818
8819 /* Make a copy of the core state now as we start the if-block. */
8820 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
8821
8822 return off;
8823}
8824
8825
8826#define IEM_MC_IF_CX_IS_NZ() \
8827 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
8828 do {
8829
8830/** Emits code for IEM_MC_IF_CX_IS_NZ. */
8831DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8832{
8833 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8834
8835 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8836 kIemNativeGstRegUse_ReadOnly);
8837 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
8838 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8839
8840 iemNativeCondStartIfBlock(pReNative, off);
8841 return off;
8842}
8843
8844
8845#define IEM_MC_IF_ECX_IS_NZ() \
8846 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
8847 do {
8848
8849#define IEM_MC_IF_RCX_IS_NZ() \
8850 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
8851 do {
8852
8853/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
8854DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8855{
8856 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8857
8858 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8859 kIemNativeGstRegUse_ReadOnly);
8860 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
8861 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8862
8863 iemNativeCondStartIfBlock(pReNative, off);
8864 return off;
8865}
8866
8867
8868#define IEM_MC_IF_CX_IS_NOT_ONE() \
8869 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
8870 do {
8871
8872/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
8873DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8874{
8875 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8876
8877 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8878 kIemNativeGstRegUse_ReadOnly);
8879#ifdef RT_ARCH_AMD64
8880 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8881#else
8882 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8883 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8884 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8885#endif
8886 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8887
8888 iemNativeCondStartIfBlock(pReNative, off);
8889 return off;
8890}
8891
8892
8893#define IEM_MC_IF_ECX_IS_NOT_ONE() \
8894 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
8895 do {
8896
8897#define IEM_MC_IF_RCX_IS_NOT_ONE() \
8898 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
8899 do {
8900
8901/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
8902DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8903{
8904 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8905
8906 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8907 kIemNativeGstRegUse_ReadOnly);
8908 if (f64Bit)
8909 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8910 else
8911 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8912 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8913
8914 iemNativeCondStartIfBlock(pReNative, off);
8915 return off;
8916}
8917
8918
8919#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8920 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
8921 do {
8922
8923#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8924 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
8925 do {
8926
8927/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
8928 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8929DECL_INLINE_THROW(uint32_t)
8930iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
8931{
8932 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8933
8934 /* We have to load both RCX and EFLAGS before we can start branching,
8935 otherwise we'll end up in the else-block with an inconsistent
8936 register allocator state.
8937 Doing EFLAGS first as it's more likely to be loaded, right? */
8938 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8939 kIemNativeGstRegUse_ReadOnly);
8940 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8941 kIemNativeGstRegUse_ReadOnly);
8942
8943 /** @todo we could reduce this to a single branch instruction by spending a
8944 * temporary register and some setnz stuff. Not sure if loops are
8945 * worth it. */
8946 /* Check CX. */
8947#ifdef RT_ARCH_AMD64
8948 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8949#else
8950 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8951 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8952 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8953#endif
8954
8955 /* Check the EFlags bit. */
8956 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8957 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8958 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8959 !fCheckIfSet /*fJmpIfSet*/);
8960
8961 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8962 iemNativeRegFreeTmp(pReNative, idxEflReg);
8963
8964 iemNativeCondStartIfBlock(pReNative, off);
8965 return off;
8966}
8967
8968
8969#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8970 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
8971 do {
8972
8973#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8974 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
8975 do {
8976
8977#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8978 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
8979 do {
8980
8981#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8982 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
8983 do {
8984
8985/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
8986 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
8987 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
8988 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8989DECL_INLINE_THROW(uint32_t)
8990iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8991 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
8992{
8993 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8994
8995 /* We have to load both RCX and EFLAGS before we can start branching,
8996 otherwise we'll end up in the else-block with an inconsistent
8997 register allocator state.
8998 Doing EFLAGS first as it's more likely to be loaded, right? */
8999 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
9000 kIemNativeGstRegUse_ReadOnly);
9001 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
9002 kIemNativeGstRegUse_ReadOnly);
9003
9004 /** @todo we could reduce this to a single branch instruction by spending a
9005 * temporary register and some setnz stuff. Not sure if loops are
9006 * worth it. */
9007 /* Check RCX/ECX. */
9008 if (f64Bit)
9009 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
9010 else
9011 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
9012
9013 /* Check the EFlags bit. */
9014 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
9015 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
9016 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
9017 !fCheckIfSet /*fJmpIfSet*/);
9018
9019 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
9020 iemNativeRegFreeTmp(pReNative, idxEflReg);
9021
9022 iemNativeCondStartIfBlock(pReNative, off);
9023 return off;
9024}
9025
9026
9027
9028/*********************************************************************************************************************************
9029* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
9030*********************************************************************************************************************************/
9031/** Number of hidden arguments for CIMPL calls.
9032 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
9033#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9034# define IEM_CIMPL_HIDDEN_ARGS 3
9035#else
9036# define IEM_CIMPL_HIDDEN_ARGS 2
9037#endif
9038
9039#define IEM_MC_NOREF(a_Name) \
9040 RT_NOREF_PV(a_Name)
9041
9042#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
9043 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
9044
9045#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
9046 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
9047
9048#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
9049 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
9050
9051#define IEM_MC_LOCAL(a_Type, a_Name) \
9052 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
9053
9054#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
9055 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
9056
9057
9058/**
9059 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
9060 */
9061DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
9062{
9063 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
9064 return IEM_CIMPL_HIDDEN_ARGS;
9065 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
9066 return 1;
9067 return 0;
9068}
9069
9070
9071/**
9072 * Internal work that allocates a variable with kind set to
9073 * kIemNativeVarKind_Invalid and no current stack allocation.
9074 *
9075 * The kind will either be set by the caller or later when the variable is first
9076 * assigned a value.
9077 *
9078 * @returns Unpacked index.
9079 * @internal
9080 */
9081static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9082{
9083 Assert(cbType > 0 && cbType <= 64);
9084 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
9085 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
9086 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
9087 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9088 pReNative->Core.aVars[idxVar].cbVar = cbType;
9089 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9090 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9091 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
9092 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
9093 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
9094 pReNative->Core.aVars[idxVar].fRegAcquired = false;
9095 pReNative->Core.aVars[idxVar].u.uValue = 0;
9096 return idxVar;
9097}
9098
9099
9100/**
9101 * Internal work that allocates an argument variable w/o setting enmKind.
9102 *
9103 * @returns Unpacked index.
9104 * @internal
9105 */
9106static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9107{
9108 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
9109 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9110 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
9111
9112 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
9113 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
9114 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
9115 return idxVar;
9116}
9117
9118
9119/**
9120 * Gets the stack slot for a stack variable, allocating one if necessary.
9121 *
9122 * Calling this function implies that the stack slot will contain a valid
9123 * variable value. The caller deals with any register currently assigned to the
9124 * variable, typically by spilling it into the stack slot.
9125 *
9126 * @returns The stack slot number.
9127 * @param pReNative The recompiler state.
9128 * @param idxVar The variable.
9129 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
9130 */
9131DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9132{
9133 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9134 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9135 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9136
9137 /* Already got a slot? */
9138 uint8_t const idxStackSlot = pVar->idxStackSlot;
9139 if (idxStackSlot != UINT8_MAX)
9140 {
9141 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
9142 return idxStackSlot;
9143 }
9144
9145 /*
9146 * A single slot is easy to allocate.
9147 * Allocate them from the top end, closest to BP, to reduce the displacement.
9148 */
9149 if (pVar->cbVar <= sizeof(uint64_t))
9150 {
9151 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9152 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9153 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
9154 pVar->idxStackSlot = (uint8_t)iSlot;
9155 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
9156 return (uint8_t)iSlot;
9157 }
9158
9159 /*
9160 * We need more than one stack slot.
9161 *
9162 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
9163 */
9164 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
9165 Assert(pVar->cbVar <= 64);
9166 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
9167 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
9168 uint32_t bmStack = ~pReNative->Core.bmStack;
9169 while (bmStack != UINT32_MAX)
9170 {
9171/** @todo allocate from the top to reduce BP displacement. */
9172 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
9173 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9174 if (!(iSlot & fBitAlignMask))
9175 {
9176 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
9177 {
9178 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
9179 pVar->idxStackSlot = (uint8_t)iSlot;
9180 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9181 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
9182 return (uint8_t)iSlot;
9183 }
9184 }
9185 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
9186 }
9187 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9188}
9189
9190
9191/**
9192 * Changes the variable to a stack variable.
9193 *
9194 * Currently this is s only possible to do the first time the variable is used,
9195 * switching later is can be implemented but not done.
9196 *
9197 * @param pReNative The recompiler state.
9198 * @param idxVar The variable.
9199 * @throws VERR_IEM_VAR_IPE_2
9200 */
9201static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9202{
9203 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9204 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9205 if (pVar->enmKind != kIemNativeVarKind_Stack)
9206 {
9207 /* We could in theory transition from immediate to stack as well, but it
9208 would involve the caller doing work storing the value on the stack. So,
9209 till that's required we only allow transition from invalid. */
9210 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9211 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9212 pVar->enmKind = kIemNativeVarKind_Stack;
9213
9214 /* Note! We don't allocate a stack slot here, that's only done when a
9215 slot is actually needed to hold a variable value. */
9216 }
9217}
9218
9219
9220/**
9221 * Sets it to a variable with a constant value.
9222 *
9223 * This does not require stack storage as we know the value and can always
9224 * reload it, unless of course it's referenced.
9225 *
9226 * @param pReNative The recompiler state.
9227 * @param idxVar The variable.
9228 * @param uValue The immediate value.
9229 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9230 */
9231static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
9232{
9233 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9234 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9235 if (pVar->enmKind != kIemNativeVarKind_Immediate)
9236 {
9237 /* Only simple transitions for now. */
9238 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9239 pVar->enmKind = kIemNativeVarKind_Immediate;
9240 }
9241 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9242
9243 pVar->u.uValue = uValue;
9244 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
9245 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
9246 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
9247}
9248
9249
9250/**
9251 * Sets the variable to a reference (pointer) to @a idxOtherVar.
9252 *
9253 * This does not require stack storage as we know the value and can always
9254 * reload it. Loading is postponed till needed.
9255 *
9256 * @param pReNative The recompiler state.
9257 * @param idxVar The variable. Unpacked.
9258 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
9259 *
9260 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9261 * @internal
9262 */
9263static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
9264{
9265 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
9266 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
9267
9268 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
9269 {
9270 /* Only simple transitions for now. */
9271 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
9272 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9273 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
9274 }
9275 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9276
9277 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
9278
9279 /* Update the other variable, ensure it's a stack variable. */
9280 /** @todo handle variables with const values... that'll go boom now. */
9281 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
9282 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9283}
9284
9285
9286/**
9287 * Sets the variable to a reference (pointer) to a guest register reference.
9288 *
9289 * This does not require stack storage as we know the value and can always
9290 * reload it. Loading is postponed till needed.
9291 *
9292 * @param pReNative The recompiler state.
9293 * @param idxVar The variable.
9294 * @param enmRegClass The class guest registers to reference.
9295 * @param idxReg The register within @a enmRegClass to reference.
9296 *
9297 * @throws VERR_IEM_VAR_IPE_2
9298 */
9299static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9300 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
9301{
9302 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9303 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9304
9305 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
9306 {
9307 /* Only simple transitions for now. */
9308 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9309 pVar->enmKind = kIemNativeVarKind_GstRegRef;
9310 }
9311 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9312
9313 pVar->u.GstRegRef.enmClass = enmRegClass;
9314 pVar->u.GstRegRef.idx = idxReg;
9315}
9316
9317
9318DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9319{
9320 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9321}
9322
9323
9324DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
9325{
9326 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9327
9328 /* Since we're using a generic uint64_t value type, we must truncate it if
9329 the variable is smaller otherwise we may end up with too large value when
9330 scaling up a imm8 w/ sign-extension.
9331
9332 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
9333 in the bios, bx=1) when running on arm, because clang expect 16-bit
9334 register parameters to have bits 16 and up set to zero. Instead of
9335 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
9336 CF value in the result. */
9337 switch (cbType)
9338 {
9339 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9340 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9341 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9342 }
9343 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9344 return idxVar;
9345}
9346
9347
9348DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
9349{
9350 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
9351 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
9352 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
9353 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
9354 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
9355 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9356
9357 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
9358 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
9359 return idxArgVar;
9360}
9361
9362
9363DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9364{
9365 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9366 /* Don't set to stack now, leave that to the first use as for instance
9367 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
9368 return idxVar;
9369}
9370
9371
9372DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
9373{
9374 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9375
9376 /* Since we're using a generic uint64_t value type, we must truncate it if
9377 the variable is smaller otherwise we may end up with too large value when
9378 scaling up a imm8 w/ sign-extension. */
9379 switch (cbType)
9380 {
9381 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9382 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9383 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9384 }
9385 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9386 return idxVar;
9387}
9388
9389
9390/**
9391 * Makes sure variable @a idxVar has a register assigned to it and that it stays
9392 * fixed till we call iemNativeVarRegisterRelease.
9393 *
9394 * @returns The host register number.
9395 * @param pReNative The recompiler state.
9396 * @param idxVar The variable.
9397 * @param poff Pointer to the instruction buffer offset.
9398 * In case a register needs to be freed up or the value
9399 * loaded off the stack.
9400 * @param fInitialized Set if the variable must already have been initialized.
9401 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
9402 * the case.
9403 * @param idxRegPref Preferred register number or UINT8_MAX.
9404 */
9405DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
9406 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
9407{
9408 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9409 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9410 Assert(pVar->cbVar <= 8);
9411 Assert(!pVar->fRegAcquired);
9412
9413 uint8_t idxReg = pVar->idxReg;
9414 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9415 {
9416 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
9417 && pVar->enmKind < kIemNativeVarKind_End);
9418 pVar->fRegAcquired = true;
9419 return idxReg;
9420 }
9421
9422 /*
9423 * If the kind of variable has not yet been set, default to 'stack'.
9424 */
9425 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
9426 && pVar->enmKind < kIemNativeVarKind_End);
9427 if (pVar->enmKind == kIemNativeVarKind_Invalid)
9428 iemNativeVarSetKindToStack(pReNative, idxVar);
9429
9430 /*
9431 * We have to allocate a register for the variable, even if its a stack one
9432 * as we don't know if there are modification being made to it before its
9433 * finalized (todo: analyze and insert hints about that?).
9434 *
9435 * If we can, we try get the correct register for argument variables. This
9436 * is assuming that most argument variables are fetched as close as possible
9437 * to the actual call, so that there aren't any interfering hidden calls
9438 * (memory accesses, etc) inbetween.
9439 *
9440 * If we cannot or it's a variable, we make sure no argument registers
9441 * that will be used by this MC block will be allocated here, and we always
9442 * prefer non-volatile registers to avoid needing to spill stuff for internal
9443 * call.
9444 */
9445 /** @todo Detect too early argument value fetches and warn about hidden
9446 * calls causing less optimal code to be generated in the python script. */
9447
9448 uint8_t const uArgNo = pVar->uArgNo;
9449 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
9450 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
9451 {
9452 idxReg = g_aidxIemNativeCallRegs[uArgNo];
9453 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9454 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
9455 }
9456 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
9457 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
9458 {
9459 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
9460 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
9461 & ~pReNative->Core.bmHstRegsWithGstShadow
9462 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
9463 & fNotArgsMask;
9464 if (fRegs)
9465 {
9466 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
9467 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
9468 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
9469 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
9470 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
9471 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9472 }
9473 else
9474 {
9475 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
9476 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
9477 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
9478 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9479 }
9480 }
9481 else
9482 {
9483 idxReg = idxRegPref;
9484 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9485 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
9486 }
9487 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9488 pVar->idxReg = idxReg;
9489
9490 /*
9491 * Load it off the stack if we've got a stack slot.
9492 */
9493 uint8_t const idxStackSlot = pVar->idxStackSlot;
9494 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9495 {
9496 Assert(fInitialized);
9497 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9498 switch (pVar->cbVar)
9499 {
9500 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
9501 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
9502 case 3: AssertFailed(); RT_FALL_THRU();
9503 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
9504 default: AssertFailed(); RT_FALL_THRU();
9505 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
9506 }
9507 }
9508 else
9509 {
9510 Assert(idxStackSlot == UINT8_MAX);
9511 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9512 }
9513 pVar->fRegAcquired = true;
9514 return idxReg;
9515}
9516
9517
9518/**
9519 * The value of variable @a idxVar will be written in full to the @a enmGstReg
9520 * guest register.
9521 *
9522 * This function makes sure there is a register for it and sets it to be the
9523 * current shadow copy of @a enmGstReg.
9524 *
9525 * @returns The host register number.
9526 * @param pReNative The recompiler state.
9527 * @param idxVar The variable.
9528 * @param enmGstReg The guest register this variable will be written to
9529 * after this call.
9530 * @param poff Pointer to the instruction buffer offset.
9531 * In case a register needs to be freed up or if the
9532 * variable content needs to be loaded off the stack.
9533 *
9534 * @note We DO NOT expect @a idxVar to be an argument variable,
9535 * because we can only in the commit stage of an instruction when this
9536 * function is used.
9537 */
9538DECL_HIDDEN_THROW(uint8_t)
9539iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
9540{
9541 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9542 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9543 Assert(!pVar->fRegAcquired);
9544 AssertMsgStmt( pVar->cbVar <= 8
9545 && ( pVar->enmKind == kIemNativeVarKind_Immediate
9546 || pVar->enmKind == kIemNativeVarKind_Stack),
9547 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
9548 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
9549 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9550
9551 /*
9552 * This shouldn't ever be used for arguments, unless it's in a weird else
9553 * branch that doesn't do any calling and even then it's questionable.
9554 *
9555 * However, in case someone writes crazy wrong MC code and does register
9556 * updates before making calls, just use the regular register allocator to
9557 * ensure we get a register suitable for the intended argument number.
9558 */
9559 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
9560
9561 /*
9562 * If there is already a register for the variable, we transfer/set the
9563 * guest shadow copy assignment to it.
9564 */
9565 uint8_t idxReg = pVar->idxReg;
9566 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9567 {
9568 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
9569 {
9570 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
9571 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
9572 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
9573 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
9574 }
9575 else
9576 {
9577 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
9578 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
9579 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
9580 }
9581 /** @todo figure this one out. We need some way of making sure the register isn't
9582 * modified after this point, just in case we start writing crappy MC code. */
9583 pVar->enmGstReg = enmGstReg;
9584 pVar->fRegAcquired = true;
9585 return idxReg;
9586 }
9587 Assert(pVar->uArgNo == UINT8_MAX);
9588
9589 /*
9590 * Because this is supposed to be the commit stage, we're just tag along with the
9591 * temporary register allocator and upgrade it to a variable register.
9592 */
9593 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
9594 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
9595 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
9596 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
9597 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
9598 pVar->idxReg = idxReg;
9599
9600 /*
9601 * Now we need to load the register value.
9602 */
9603 if (pVar->enmKind == kIemNativeVarKind_Immediate)
9604 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
9605 else
9606 {
9607 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9608 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9609 switch (pVar->cbVar)
9610 {
9611 case sizeof(uint64_t):
9612 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
9613 break;
9614 case sizeof(uint32_t):
9615 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
9616 break;
9617 case sizeof(uint16_t):
9618 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
9619 break;
9620 case sizeof(uint8_t):
9621 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
9622 break;
9623 default:
9624 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9625 }
9626 }
9627
9628 pVar->fRegAcquired = true;
9629 return idxReg;
9630}
9631
9632
9633/**
9634 * Sets the host register for @a idxVarRc to @a idxReg.
9635 *
9636 * The register must not be allocated. Any guest register shadowing will be
9637 * implictly dropped by this call.
9638 *
9639 * The variable must not have any register associated with it (causes
9640 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
9641 * implied.
9642 *
9643 * @returns idxReg
9644 * @param pReNative The recompiler state.
9645 * @param idxVar The variable.
9646 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
9647 * @param off For recording in debug info.
9648 *
9649 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
9650 */
9651DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
9652{
9653 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9654 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9655 Assert(!pVar->fRegAcquired);
9656 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
9657 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
9658 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
9659
9660 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
9661 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9662
9663 iemNativeVarSetKindToStack(pReNative, idxVar);
9664 pVar->idxReg = idxReg;
9665
9666 return idxReg;
9667}
9668
9669
9670/**
9671 * A convenient helper function.
9672 */
9673DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9674 uint8_t idxReg, uint32_t *poff)
9675{
9676 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
9677 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
9678 return idxReg;
9679}
9680
9681
9682/**
9683 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
9684 *
9685 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
9686 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
9687 * requirement of flushing anything in volatile host registers when making a
9688 * call.
9689 *
9690 * @returns New @a off value.
9691 * @param pReNative The recompiler state.
9692 * @param off The code buffer position.
9693 * @param fHstRegsNotToSave Set of registers not to save & restore.
9694 */
9695DECL_HIDDEN_THROW(uint32_t)
9696iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9697{
9698 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9699 if (fHstRegs)
9700 {
9701 do
9702 {
9703 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9704 fHstRegs &= ~RT_BIT_32(idxHstReg);
9705
9706 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9707 {
9708 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9709 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9710 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9711 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9712 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9713 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9714 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9715 {
9716 case kIemNativeVarKind_Stack:
9717 {
9718 /* Temporarily spill the variable register. */
9719 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9720 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9721 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9722 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9723 continue;
9724 }
9725
9726 case kIemNativeVarKind_Immediate:
9727 case kIemNativeVarKind_VarRef:
9728 case kIemNativeVarKind_GstRegRef:
9729 /* It is weird to have any of these loaded at this point. */
9730 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9731 continue;
9732
9733 case kIemNativeVarKind_End:
9734 case kIemNativeVarKind_Invalid:
9735 break;
9736 }
9737 AssertFailed();
9738 }
9739 else
9740 {
9741 /*
9742 * Allocate a temporary stack slot and spill the register to it.
9743 */
9744 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9745 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
9746 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9747 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
9748 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
9749 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9750 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9751 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9752 }
9753 } while (fHstRegs);
9754 }
9755 return off;
9756}
9757
9758
9759/**
9760 * Emit code to restore volatile registers after to a call to a helper.
9761 *
9762 * @returns New @a off value.
9763 * @param pReNative The recompiler state.
9764 * @param off The code buffer position.
9765 * @param fHstRegsNotToSave Set of registers not to save & restore.
9766 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
9767 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
9768 */
9769DECL_HIDDEN_THROW(uint32_t)
9770iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9771{
9772 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9773 if (fHstRegs)
9774 {
9775 do
9776 {
9777 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9778 fHstRegs &= ~RT_BIT_32(idxHstReg);
9779
9780 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9781 {
9782 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9783 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9784 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9785 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9786 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9787 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9788 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9789 {
9790 case kIemNativeVarKind_Stack:
9791 {
9792 /* Unspill the variable register. */
9793 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9794 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
9795 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9796 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9797 continue;
9798 }
9799
9800 case kIemNativeVarKind_Immediate:
9801 case kIemNativeVarKind_VarRef:
9802 case kIemNativeVarKind_GstRegRef:
9803 /* It is weird to have any of these loaded at this point. */
9804 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9805 continue;
9806
9807 case kIemNativeVarKind_End:
9808 case kIemNativeVarKind_Invalid:
9809 break;
9810 }
9811 AssertFailed();
9812 }
9813 else
9814 {
9815 /*
9816 * Restore from temporary stack slot.
9817 */
9818 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
9819 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
9820 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
9821 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
9822
9823 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9824 }
9825 } while (fHstRegs);
9826 }
9827 return off;
9828}
9829
9830
9831/**
9832 * Worker that frees the stack slots for variable @a idxVar if any allocated.
9833 *
9834 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
9835 *
9836 * ASSUMES that @a idxVar is valid and unpacked.
9837 */
9838DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9839{
9840 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
9841 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
9842 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9843 {
9844 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
9845 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
9846 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
9847 Assert(cSlots > 0);
9848 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
9849 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9850 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
9851 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
9852 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9853 }
9854 else
9855 Assert(idxStackSlot == UINT8_MAX);
9856}
9857
9858
9859/**
9860 * Worker that frees a single variable.
9861 *
9862 * ASSUMES that @a idxVar is valid and unpacked.
9863 */
9864DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9865{
9866 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
9867 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
9868 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
9869
9870 /* Free the host register first if any assigned. */
9871 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9872 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9873 {
9874 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9875 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9876 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9877 }
9878
9879 /* Free argument mapping. */
9880 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
9881 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
9882 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
9883
9884 /* Free the stack slots. */
9885 iemNativeVarFreeStackSlots(pReNative, idxVar);
9886
9887 /* Free the actual variable. */
9888 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9889 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9890}
9891
9892
9893/**
9894 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
9895 */
9896DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
9897{
9898 while (bmVars != 0)
9899 {
9900 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9901 bmVars &= ~RT_BIT_32(idxVar);
9902
9903#if 1 /** @todo optimize by simplifying this later... */
9904 iemNativeVarFreeOneWorker(pReNative, idxVar);
9905#else
9906 /* Only need to free the host register, the rest is done as bulk updates below. */
9907 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9908 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9909 {
9910 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9911 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9912 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9913 }
9914#endif
9915 }
9916#if 0 /** @todo optimize by simplifying this later... */
9917 pReNative->Core.bmVars = 0;
9918 pReNative->Core.bmStack = 0;
9919 pReNative->Core.u64ArgVars = UINT64_MAX;
9920#endif
9921}
9922
9923
9924/**
9925 * This is called by IEM_MC_END() to clean up all variables.
9926 */
9927DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
9928{
9929 uint32_t const bmVars = pReNative->Core.bmVars;
9930 if (bmVars != 0)
9931 iemNativeVarFreeAllSlow(pReNative, bmVars);
9932 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9933 Assert(pReNative->Core.bmStack == 0);
9934}
9935
9936
9937#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
9938
9939/**
9940 * This is called by IEM_MC_FREE_LOCAL.
9941 */
9942DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9943{
9944 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9945 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
9946 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9947}
9948
9949
9950#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
9951
9952/**
9953 * This is called by IEM_MC_FREE_ARG.
9954 */
9955DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9956{
9957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9958 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
9959 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9960}
9961
9962
9963#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
9964
9965/**
9966 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
9967 */
9968DECL_INLINE_THROW(uint32_t)
9969iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
9970{
9971 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
9972 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
9973 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9974 Assert( pVarDst->cbVar == sizeof(uint16_t)
9975 || pVarDst->cbVar == sizeof(uint32_t));
9976
9977 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
9978 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
9979 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
9980 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
9981 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9982
9983 Assert(pVarDst->cbVar < pVarSrc->cbVar);
9984
9985 /*
9986 * Special case for immediates.
9987 */
9988 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
9989 {
9990 switch (pVarDst->cbVar)
9991 {
9992 case sizeof(uint16_t):
9993 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
9994 break;
9995 case sizeof(uint32_t):
9996 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
9997 break;
9998 default: AssertFailed(); break;
9999 }
10000 }
10001 else
10002 {
10003 /*
10004 * The generic solution for now.
10005 */
10006 /** @todo optimize this by having the python script make sure the source
10007 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
10008 * statement. Then we could just transfer the register assignments. */
10009 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
10010 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
10011 switch (pVarDst->cbVar)
10012 {
10013 case sizeof(uint16_t):
10014 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
10015 break;
10016 case sizeof(uint32_t):
10017 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
10018 break;
10019 default: AssertFailed(); break;
10020 }
10021 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
10022 iemNativeVarRegisterRelease(pReNative, idxVarDst);
10023 }
10024 return off;
10025}
10026
10027
10028
10029/*********************************************************************************************************************************
10030* Emitters for IEM_MC_CALL_CIMPL_XXX *
10031*********************************************************************************************************************************/
10032
10033/**
10034 * Emits code to load a reference to the given guest register into @a idxGprDst.
10035 */
10036DECL_INLINE_THROW(uint32_t)
10037iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
10038 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
10039{
10040#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
10041 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
10042#endif
10043
10044 /*
10045 * Get the offset relative to the CPUMCTX structure.
10046 */
10047 uint32_t offCpumCtx;
10048 switch (enmClass)
10049 {
10050 case kIemNativeGstRegRef_Gpr:
10051 Assert(idxRegInClass < 16);
10052 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
10053 break;
10054
10055 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
10056 Assert(idxRegInClass < 4);
10057 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
10058 break;
10059
10060 case kIemNativeGstRegRef_EFlags:
10061 Assert(idxRegInClass == 0);
10062 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
10063 break;
10064
10065 case kIemNativeGstRegRef_MxCsr:
10066 Assert(idxRegInClass == 0);
10067 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
10068 break;
10069
10070 case kIemNativeGstRegRef_FpuReg:
10071 Assert(idxRegInClass < 8);
10072 AssertFailed(); /** @todo what kind of indexing? */
10073 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10074 break;
10075
10076 case kIemNativeGstRegRef_MReg:
10077 Assert(idxRegInClass < 8);
10078 AssertFailed(); /** @todo what kind of indexing? */
10079 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10080 break;
10081
10082 case kIemNativeGstRegRef_XReg:
10083 Assert(idxRegInClass < 16);
10084 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
10085 break;
10086
10087 default:
10088 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
10089 }
10090
10091 /*
10092 * Load the value into the destination register.
10093 */
10094#ifdef RT_ARCH_AMD64
10095 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
10096
10097#elif defined(RT_ARCH_ARM64)
10098 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10099 Assert(offCpumCtx < 4096);
10100 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
10101
10102#else
10103# error "Port me!"
10104#endif
10105
10106 return off;
10107}
10108
10109
10110/**
10111 * Common code for CIMPL and AIMPL calls.
10112 *
10113 * These are calls that uses argument variables and such. They should not be
10114 * confused with internal calls required to implement an MC operation,
10115 * like a TLB load and similar.
10116 *
10117 * Upon return all that is left to do is to load any hidden arguments and
10118 * perform the call. All argument variables are freed.
10119 *
10120 * @returns New code buffer offset; throws VBox status code on error.
10121 * @param pReNative The native recompile state.
10122 * @param off The code buffer offset.
10123 * @param cArgs The total nubmer of arguments (includes hidden
10124 * count).
10125 * @param cHiddenArgs The number of hidden arguments. The hidden
10126 * arguments must not have any variable declared for
10127 * them, whereas all the regular arguments must
10128 * (tstIEMCheckMc ensures this).
10129 */
10130DECL_HIDDEN_THROW(uint32_t)
10131iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
10132{
10133#ifdef VBOX_STRICT
10134 /*
10135 * Assert sanity.
10136 */
10137 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
10138 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
10139 for (unsigned i = 0; i < cHiddenArgs; i++)
10140 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
10141 for (unsigned i = cHiddenArgs; i < cArgs; i++)
10142 {
10143 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
10144 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
10145 }
10146 iemNativeRegAssertSanity(pReNative);
10147#endif
10148
10149 /* We don't know what the called function makes use of, so flush any pending register writes. */
10150 off = iemNativeRegFlushPendingWrites(pReNative, off);
10151
10152 /*
10153 * Before we do anything else, go over variables that are referenced and
10154 * make sure they are not in a register.
10155 */
10156 uint32_t bmVars = pReNative->Core.bmVars;
10157 if (bmVars)
10158 {
10159 do
10160 {
10161 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
10162 bmVars &= ~RT_BIT_32(idxVar);
10163
10164 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
10165 {
10166 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
10167 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
10168 {
10169 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
10170 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
10171 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
10172 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
10173 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
10174
10175 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
10176 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
10177 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
10178 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
10179 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
10180 }
10181 }
10182 } while (bmVars != 0);
10183#if 0 //def VBOX_STRICT
10184 iemNativeRegAssertSanity(pReNative);
10185#endif
10186 }
10187
10188 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
10189
10190 /*
10191 * First, go over the host registers that will be used for arguments and make
10192 * sure they either hold the desired argument or are free.
10193 */
10194 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
10195 {
10196 for (uint32_t i = 0; i < cRegArgs; i++)
10197 {
10198 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10199 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10200 {
10201 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
10202 {
10203 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
10204 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
10205 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
10206 Assert(pVar->idxReg == idxArgReg);
10207 uint8_t const uArgNo = pVar->uArgNo;
10208 if (uArgNo == i)
10209 { /* prefect */ }
10210 /* The variable allocator logic should make sure this is impossible,
10211 except for when the return register is used as a parameter (ARM,
10212 but not x86). */
10213#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
10214 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
10215 {
10216# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10217# error "Implement this"
10218# endif
10219 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
10220 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
10221 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
10222 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10223 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
10224 }
10225#endif
10226 else
10227 {
10228 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10229
10230 if (pVar->enmKind == kIemNativeVarKind_Stack)
10231 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
10232 else
10233 {
10234 /* just free it, can be reloaded if used again */
10235 pVar->idxReg = UINT8_MAX;
10236 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
10237 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
10238 }
10239 }
10240 }
10241 else
10242 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
10243 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
10244 }
10245 }
10246#if 0 //def VBOX_STRICT
10247 iemNativeRegAssertSanity(pReNative);
10248#endif
10249 }
10250
10251 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
10252
10253#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10254 /*
10255 * If there are any stack arguments, make sure they are in their place as well.
10256 *
10257 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
10258 * the caller) be loading it later and it must be free (see first loop).
10259 */
10260 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
10261 {
10262 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
10263 {
10264 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10265 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
10266 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10267 {
10268 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
10269 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
10270 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
10271 pVar->idxReg = UINT8_MAX;
10272 }
10273 else
10274 {
10275 /* Use ARG0 as temp for stuff we need registers for. */
10276 switch (pVar->enmKind)
10277 {
10278 case kIemNativeVarKind_Stack:
10279 {
10280 uint8_t const idxStackSlot = pVar->idxStackSlot;
10281 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10282 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
10283 iemNativeStackCalcBpDisp(idxStackSlot));
10284 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10285 continue;
10286 }
10287
10288 case kIemNativeVarKind_Immediate:
10289 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
10290 continue;
10291
10292 case kIemNativeVarKind_VarRef:
10293 {
10294 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10295 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10296 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10297 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10298 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10299 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10300 {
10301 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10302 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10303 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10304 }
10305 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10306 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10307 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
10308 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10309 continue;
10310 }
10311
10312 case kIemNativeVarKind_GstRegRef:
10313 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
10314 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10315 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10316 continue;
10317
10318 case kIemNativeVarKind_Invalid:
10319 case kIemNativeVarKind_End:
10320 break;
10321 }
10322 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10323 }
10324 }
10325# if 0 //def VBOX_STRICT
10326 iemNativeRegAssertSanity(pReNative);
10327# endif
10328 }
10329#else
10330 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
10331#endif
10332
10333 /*
10334 * Make sure the argument variables are loaded into their respective registers.
10335 *
10336 * We can optimize this by ASSUMING that any register allocations are for
10337 * registeres that have already been loaded and are ready. The previous step
10338 * saw to that.
10339 */
10340 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
10341 {
10342 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10343 {
10344 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10345 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10346 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
10347 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
10348 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
10349 else
10350 {
10351 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10352 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10353 {
10354 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
10355 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
10356 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
10357 | RT_BIT_32(idxArgReg);
10358 pVar->idxReg = idxArgReg;
10359 }
10360 else
10361 {
10362 /* Use ARG0 as temp for stuff we need registers for. */
10363 switch (pVar->enmKind)
10364 {
10365 case kIemNativeVarKind_Stack:
10366 {
10367 uint8_t const idxStackSlot = pVar->idxStackSlot;
10368 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10369 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
10370 continue;
10371 }
10372
10373 case kIemNativeVarKind_Immediate:
10374 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
10375 continue;
10376
10377 case kIemNativeVarKind_VarRef:
10378 {
10379 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10380 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10381 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
10382 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10383 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10384 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10385 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10386 {
10387 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10388 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10389 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10390 }
10391 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10392 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10393 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
10394 continue;
10395 }
10396
10397 case kIemNativeVarKind_GstRegRef:
10398 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
10399 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10400 continue;
10401
10402 case kIemNativeVarKind_Invalid:
10403 case kIemNativeVarKind_End:
10404 break;
10405 }
10406 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10407 }
10408 }
10409 }
10410#if 0 //def VBOX_STRICT
10411 iemNativeRegAssertSanity(pReNative);
10412#endif
10413 }
10414#ifdef VBOX_STRICT
10415 else
10416 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10417 {
10418 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
10419 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
10420 }
10421#endif
10422
10423 /*
10424 * Free all argument variables (simplified).
10425 * Their lifetime always expires with the call they are for.
10426 */
10427 /** @todo Make the python script check that arguments aren't used after
10428 * IEM_MC_CALL_XXXX. */
10429 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
10430 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
10431 * an argument value. There is also some FPU stuff. */
10432 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
10433 {
10434 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
10435 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
10436
10437 /* no need to free registers: */
10438 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
10439 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
10440 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
10441 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
10442 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
10443 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
10444
10445 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
10446 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
10447 iemNativeVarFreeStackSlots(pReNative, idxVar);
10448 }
10449 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
10450
10451 /*
10452 * Flush volatile registers as we make the call.
10453 */
10454 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
10455
10456 return off;
10457}
10458
10459
10460/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
10461DECL_HIDDEN_THROW(uint32_t)
10462iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
10463 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
10464
10465{
10466 /*
10467 * Do all the call setup and cleanup.
10468 */
10469 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
10470
10471 /*
10472 * Load the two or three hidden arguments.
10473 */
10474#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10475 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
10476 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10477 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
10478#else
10479 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10480 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
10481#endif
10482
10483 /*
10484 * Make the call and check the return code.
10485 *
10486 * Shadow PC copies are always flushed here, other stuff depends on flags.
10487 * Segment and general purpose registers are explictily flushed via the
10488 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
10489 * macros.
10490 */
10491 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
10492#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10493 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
10494#endif
10495 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
10496 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
10497 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
10498 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
10499
10500 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
10501}
10502
10503
10504#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
10505 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
10506
10507/** Emits code for IEM_MC_CALL_CIMPL_1. */
10508DECL_INLINE_THROW(uint32_t)
10509iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10510 uintptr_t pfnCImpl, uint8_t idxArg0)
10511{
10512 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10513 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
10514}
10515
10516
10517#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
10518 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
10519
10520/** Emits code for IEM_MC_CALL_CIMPL_2. */
10521DECL_INLINE_THROW(uint32_t)
10522iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10523 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
10524{
10525 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10526 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10527 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
10528}
10529
10530
10531#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
10532 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10533 (uintptr_t)a_pfnCImpl, a0, a1, a2)
10534
10535/** Emits code for IEM_MC_CALL_CIMPL_3. */
10536DECL_INLINE_THROW(uint32_t)
10537iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10538 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10539{
10540 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10541 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10542 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10543 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
10544}
10545
10546
10547#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
10548 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10549 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
10550
10551/** Emits code for IEM_MC_CALL_CIMPL_4. */
10552DECL_INLINE_THROW(uint32_t)
10553iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10554 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10555{
10556 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10557 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10558 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10559 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10560 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
10561}
10562
10563
10564#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
10565 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10566 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
10567
10568/** Emits code for IEM_MC_CALL_CIMPL_4. */
10569DECL_INLINE_THROW(uint32_t)
10570iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10571 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
10572{
10573 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10574 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10575 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10576 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10577 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
10578 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
10579}
10580
10581
10582/** Recompiler debugging: Flush guest register shadow copies. */
10583#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
10584
10585
10586
10587/*********************************************************************************************************************************
10588* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
10589*********************************************************************************************************************************/
10590
10591/**
10592 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
10593 */
10594DECL_INLINE_THROW(uint32_t)
10595iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10596 uintptr_t pfnAImpl, uint8_t cArgs)
10597{
10598 if (idxVarRc != UINT8_MAX)
10599 {
10600 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
10601 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
10602 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
10603 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
10604 }
10605
10606 /*
10607 * Do all the call setup and cleanup.
10608 */
10609 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
10610
10611 /*
10612 * Make the call and update the return code variable if we've got one.
10613 */
10614 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10615 if (idxVarRc != UINT8_MAX)
10616 {
10617off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
10618 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
10619 }
10620
10621 return off;
10622}
10623
10624
10625
10626#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
10627 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
10628
10629#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
10630 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
10631
10632/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
10633DECL_INLINE_THROW(uint32_t)
10634iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
10635{
10636 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
10637}
10638
10639
10640#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
10641 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
10642
10643#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
10644 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
10645
10646/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
10647DECL_INLINE_THROW(uint32_t)
10648iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
10649{
10650 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10651 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
10652}
10653
10654
10655#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
10656 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
10657
10658#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
10659 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
10660
10661/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
10662DECL_INLINE_THROW(uint32_t)
10663iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10664 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10665{
10666 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10667 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10668 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
10669}
10670
10671
10672#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
10673 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
10674
10675#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
10676 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
10677
10678/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
10679DECL_INLINE_THROW(uint32_t)
10680iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10681 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10682{
10683 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10684 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10685 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10686 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
10687}
10688
10689
10690#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
10691 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10692
10693#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
10694 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10695
10696/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
10697DECL_INLINE_THROW(uint32_t)
10698iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10699 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10700{
10701 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10702 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10703 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10704 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
10705 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
10706}
10707
10708
10709
10710/*********************************************************************************************************************************
10711* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
10712*********************************************************************************************************************************/
10713
10714#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
10715 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
10716
10717#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10718 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
10719
10720#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10721 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
10722
10723#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10724 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
10725
10726
10727/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
10728 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
10729DECL_INLINE_THROW(uint32_t)
10730iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
10731{
10732 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10733 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10734 Assert(iGRegEx < 20);
10735
10736 /* Same discussion as in iemNativeEmitFetchGregU16 */
10737 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10738 kIemNativeGstRegUse_ReadOnly);
10739
10740 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10741 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10742
10743 /* The value is zero-extended to the full 64-bit host register width. */
10744 if (iGRegEx < 16)
10745 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10746 else
10747 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10748
10749 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10750 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10751 return off;
10752}
10753
10754
10755#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10756 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
10757
10758#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10759 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
10760
10761#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10762 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
10763
10764/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
10765DECL_INLINE_THROW(uint32_t)
10766iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
10767{
10768 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10769 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10770 Assert(iGRegEx < 20);
10771
10772 /* Same discussion as in iemNativeEmitFetchGregU16 */
10773 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10774 kIemNativeGstRegUse_ReadOnly);
10775
10776 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10777 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10778
10779 if (iGRegEx < 16)
10780 {
10781 switch (cbSignExtended)
10782 {
10783 case sizeof(uint16_t):
10784 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10785 break;
10786 case sizeof(uint32_t):
10787 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10788 break;
10789 case sizeof(uint64_t):
10790 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10791 break;
10792 default: AssertFailed(); break;
10793 }
10794 }
10795 else
10796 {
10797 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10798 switch (cbSignExtended)
10799 {
10800 case sizeof(uint16_t):
10801 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10802 break;
10803 case sizeof(uint32_t):
10804 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10805 break;
10806 case sizeof(uint64_t):
10807 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10808 break;
10809 default: AssertFailed(); break;
10810 }
10811 }
10812
10813 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10814 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10815 return off;
10816}
10817
10818
10819
10820#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
10821 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
10822
10823#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
10824 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10825
10826#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
10827 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10828
10829/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
10830DECL_INLINE_THROW(uint32_t)
10831iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10832{
10833 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10834 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10835 Assert(iGReg < 16);
10836
10837 /*
10838 * We can either just load the low 16-bit of the GPR into a host register
10839 * for the variable, or we can do so via a shadow copy host register. The
10840 * latter will avoid having to reload it if it's being stored later, but
10841 * will waste a host register if it isn't touched again. Since we don't
10842 * know what going to happen, we choose the latter for now.
10843 */
10844 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10845 kIemNativeGstRegUse_ReadOnly);
10846
10847 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10848 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10849 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10850 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10851
10852 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10853 return off;
10854}
10855
10856
10857#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
10858 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10859
10860#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
10861 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10862
10863/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
10864DECL_INLINE_THROW(uint32_t)
10865iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
10866{
10867 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10868 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10869 Assert(iGReg < 16);
10870
10871 /*
10872 * We can either just load the low 16-bit of the GPR into a host register
10873 * for the variable, or we can do so via a shadow copy host register. The
10874 * latter will avoid having to reload it if it's being stored later, but
10875 * will waste a host register if it isn't touched again. Since we don't
10876 * know what going to happen, we choose the latter for now.
10877 */
10878 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10879 kIemNativeGstRegUse_ReadOnly);
10880
10881 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10882 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10883 if (cbSignExtended == sizeof(uint32_t))
10884 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10885 else
10886 {
10887 Assert(cbSignExtended == sizeof(uint64_t));
10888 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10889 }
10890 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10891
10892 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10893 return off;
10894}
10895
10896
10897#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
10898 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
10899
10900#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
10901 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
10902
10903/** Emits code for IEM_MC_FETCH_GREG_U32. */
10904DECL_INLINE_THROW(uint32_t)
10905iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10906{
10907 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10908 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10909 Assert(iGReg < 16);
10910
10911 /*
10912 * We can either just load the low 16-bit of the GPR into a host register
10913 * for the variable, or we can do so via a shadow copy host register. The
10914 * latter will avoid having to reload it if it's being stored later, but
10915 * will waste a host register if it isn't touched again. Since we don't
10916 * know what going to happen, we choose the latter for now.
10917 */
10918 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10919 kIemNativeGstRegUse_ReadOnly);
10920
10921 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10922 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10923 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10924 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10925
10926 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10927 return off;
10928}
10929
10930
10931#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
10932 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
10933
10934/** Emits code for IEM_MC_FETCH_GREG_U32. */
10935DECL_INLINE_THROW(uint32_t)
10936iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10937{
10938 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10939 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10940 Assert(iGReg < 16);
10941
10942 /*
10943 * We can either just load the low 32-bit of the GPR into a host register
10944 * for the variable, or we can do so via a shadow copy host register. The
10945 * latter will avoid having to reload it if it's being stored later, but
10946 * will waste a host register if it isn't touched again. Since we don't
10947 * know what going to happen, we choose the latter for now.
10948 */
10949 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10950 kIemNativeGstRegUse_ReadOnly);
10951
10952 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10953 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10954 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10955 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10956
10957 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10958 return off;
10959}
10960
10961
10962#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
10963 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10964
10965#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
10966 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10967
10968/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
10969 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
10970DECL_INLINE_THROW(uint32_t)
10971iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10972{
10973 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10974 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10975 Assert(iGReg < 16);
10976
10977 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10978 kIemNativeGstRegUse_ReadOnly);
10979
10980 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10981 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10982 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
10983 /** @todo name the register a shadow one already? */
10984 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10985
10986 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10987 return off;
10988}
10989
10990
10991
10992/*********************************************************************************************************************************
10993* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
10994*********************************************************************************************************************************/
10995
10996#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
10997 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
10998
10999/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
11000DECL_INLINE_THROW(uint32_t)
11001iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
11002{
11003 Assert(iGRegEx < 20);
11004 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
11005 kIemNativeGstRegUse_ForUpdate);
11006#ifdef RT_ARCH_AMD64
11007 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
11008
11009 /* To the lowest byte of the register: mov r8, imm8 */
11010 if (iGRegEx < 16)
11011 {
11012 if (idxGstTmpReg >= 8)
11013 pbCodeBuf[off++] = X86_OP_REX_B;
11014 else if (idxGstTmpReg >= 4)
11015 pbCodeBuf[off++] = X86_OP_REX;
11016 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
11017 pbCodeBuf[off++] = u8Value;
11018 }
11019 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
11020 else if (idxGstTmpReg < 4)
11021 {
11022 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
11023 pbCodeBuf[off++] = u8Value;
11024 }
11025 else
11026 {
11027 /* ror reg64, 8 */
11028 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11029 pbCodeBuf[off++] = 0xc1;
11030 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11031 pbCodeBuf[off++] = 8;
11032
11033 /* mov reg8, imm8 */
11034 if (idxGstTmpReg >= 8)
11035 pbCodeBuf[off++] = X86_OP_REX_B;
11036 else if (idxGstTmpReg >= 4)
11037 pbCodeBuf[off++] = X86_OP_REX;
11038 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
11039 pbCodeBuf[off++] = u8Value;
11040
11041 /* rol reg64, 8 */
11042 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11043 pbCodeBuf[off++] = 0xc1;
11044 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11045 pbCodeBuf[off++] = 8;
11046 }
11047
11048#elif defined(RT_ARCH_ARM64)
11049 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
11050 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11051 if (iGRegEx < 16)
11052 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
11053 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
11054 else
11055 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
11056 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
11057 iemNativeRegFreeTmp(pReNative, idxImmReg);
11058
11059#else
11060# error "Port me!"
11061#endif
11062
11063 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11064
11065 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11066
11067 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11068 return off;
11069}
11070
11071
11072#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
11073 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
11074
11075/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
11076DECL_INLINE_THROW(uint32_t)
11077iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
11078{
11079 Assert(iGRegEx < 20);
11080 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11081
11082 /*
11083 * If it's a constant value (unlikely) we treat this as a
11084 * IEM_MC_STORE_GREG_U8_CONST statement.
11085 */
11086 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11087 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11088 { /* likely */ }
11089 else
11090 {
11091 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11092 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11093 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
11094 }
11095
11096 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
11097 kIemNativeGstRegUse_ForUpdate);
11098 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11099
11100#ifdef RT_ARCH_AMD64
11101 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
11102 if (iGRegEx < 16)
11103 {
11104 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
11105 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11106 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11107 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11108 pbCodeBuf[off++] = X86_OP_REX;
11109 pbCodeBuf[off++] = 0x8a;
11110 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11111 }
11112 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
11113 else if (idxGstTmpReg < 4 && idxVarReg < 4)
11114 {
11115 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
11116 pbCodeBuf[off++] = 0x8a;
11117 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
11118 }
11119 else
11120 {
11121 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
11122
11123 /* ror reg64, 8 */
11124 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11125 pbCodeBuf[off++] = 0xc1;
11126 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11127 pbCodeBuf[off++] = 8;
11128
11129 /* mov reg8, reg8(r/m) */
11130 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11131 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11132 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11133 pbCodeBuf[off++] = X86_OP_REX;
11134 pbCodeBuf[off++] = 0x8a;
11135 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11136
11137 /* rol reg64, 8 */
11138 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11139 pbCodeBuf[off++] = 0xc1;
11140 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11141 pbCodeBuf[off++] = 8;
11142 }
11143
11144#elif defined(RT_ARCH_ARM64)
11145 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
11146 or
11147 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
11148 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11149 if (iGRegEx < 16)
11150 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
11151 else
11152 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
11153
11154#else
11155# error "Port me!"
11156#endif
11157 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11158
11159 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11160
11161 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11162 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11163 return off;
11164}
11165
11166
11167
11168#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
11169 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
11170
11171/** Emits code for IEM_MC_STORE_GREG_U16. */
11172DECL_INLINE_THROW(uint32_t)
11173iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
11174{
11175 Assert(iGReg < 16);
11176 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11177 kIemNativeGstRegUse_ForUpdate);
11178#ifdef RT_ARCH_AMD64
11179 /* mov reg16, imm16 */
11180 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11181 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11182 if (idxGstTmpReg >= 8)
11183 pbCodeBuf[off++] = X86_OP_REX_B;
11184 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
11185 pbCodeBuf[off++] = RT_BYTE1(uValue);
11186 pbCodeBuf[off++] = RT_BYTE2(uValue);
11187
11188#elif defined(RT_ARCH_ARM64)
11189 /* movk xdst, #uValue, lsl #0 */
11190 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11191 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
11192
11193#else
11194# error "Port me!"
11195#endif
11196
11197 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11198
11199 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11200 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11201 return off;
11202}
11203
11204
11205#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
11206 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
11207
11208/** Emits code for IEM_MC_STORE_GREG_U16. */
11209DECL_INLINE_THROW(uint32_t)
11210iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11211{
11212 Assert(iGReg < 16);
11213 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11214
11215 /*
11216 * If it's a constant value (unlikely) we treat this as a
11217 * IEM_MC_STORE_GREG_U16_CONST statement.
11218 */
11219 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11220 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11221 { /* likely */ }
11222 else
11223 {
11224 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11225 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11226 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
11227 }
11228
11229 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11230 kIemNativeGstRegUse_ForUpdate);
11231
11232#ifdef RT_ARCH_AMD64
11233 /* mov reg16, reg16 or [mem16] */
11234 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
11235 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11236 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
11237 {
11238 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
11239 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
11240 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
11241 pbCodeBuf[off++] = 0x8b;
11242 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
11243 }
11244 else
11245 {
11246 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
11247 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
11248 if (idxGstTmpReg >= 8)
11249 pbCodeBuf[off++] = X86_OP_REX_R;
11250 pbCodeBuf[off++] = 0x8b;
11251 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11252 }
11253
11254#elif defined(RT_ARCH_ARM64)
11255 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11256 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11257 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11258 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
11259 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11260
11261#else
11262# error "Port me!"
11263#endif
11264
11265 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11266
11267 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11268 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11269 return off;
11270}
11271
11272
11273#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
11274 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
11275
11276/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
11277DECL_INLINE_THROW(uint32_t)
11278iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
11279{
11280 Assert(iGReg < 16);
11281 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11282 kIemNativeGstRegUse_ForFullWrite);
11283 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11284 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11285 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11286 return off;
11287}
11288
11289
11290#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
11291 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
11292
11293/** Emits code for IEM_MC_STORE_GREG_U32. */
11294DECL_INLINE_THROW(uint32_t)
11295iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11296{
11297 Assert(iGReg < 16);
11298 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11299
11300 /*
11301 * If it's a constant value (unlikely) we treat this as a
11302 * IEM_MC_STORE_GREG_U32_CONST statement.
11303 */
11304 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11305 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11306 { /* likely */ }
11307 else
11308 {
11309 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11310 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11311 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
11312 }
11313
11314 /*
11315 * For the rest we allocate a guest register for the variable and writes
11316 * it to the CPUMCTX structure.
11317 */
11318 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11319 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11320#ifdef VBOX_STRICT
11321 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
11322#endif
11323 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11324 return off;
11325}
11326
11327
11328#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
11329 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
11330
11331/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
11332DECL_INLINE_THROW(uint32_t)
11333iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
11334{
11335 Assert(iGReg < 16);
11336 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11337 kIemNativeGstRegUse_ForFullWrite);
11338 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11339 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11340 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11341 return off;
11342}
11343
11344
11345#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
11346 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
11347
11348/** Emits code for IEM_MC_STORE_GREG_U64. */
11349DECL_INLINE_THROW(uint32_t)
11350iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11351{
11352 Assert(iGReg < 16);
11353 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11354
11355 /*
11356 * If it's a constant value (unlikely) we treat this as a
11357 * IEM_MC_STORE_GREG_U64_CONST statement.
11358 */
11359 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11360 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11361 { /* likely */ }
11362 else
11363 {
11364 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11365 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11366 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
11367 }
11368
11369 /*
11370 * For the rest we allocate a guest register for the variable and writes
11371 * it to the CPUMCTX structure.
11372 */
11373 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11374 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11375 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11376 return off;
11377}
11378
11379
11380#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
11381 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
11382
11383/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
11384DECL_INLINE_THROW(uint32_t)
11385iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
11386{
11387 Assert(iGReg < 16);
11388 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11389 kIemNativeGstRegUse_ForUpdate);
11390 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
11391 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11392 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11393 return off;
11394}
11395
11396
11397/*********************************************************************************************************************************
11398* General purpose register manipulation (add, sub). *
11399*********************************************************************************************************************************/
11400
11401#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11402 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11403
11404/** Emits code for IEM_MC_ADD_GREG_U16. */
11405DECL_INLINE_THROW(uint32_t)
11406iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
11407{
11408 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11409 kIemNativeGstRegUse_ForUpdate);
11410
11411#ifdef RT_ARCH_AMD64
11412 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11413 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11414 if (idxGstTmpReg >= 8)
11415 pbCodeBuf[off++] = X86_OP_REX_B;
11416 if (uAddend == 1)
11417 {
11418 pbCodeBuf[off++] = 0xff; /* inc */
11419 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11420 }
11421 else
11422 {
11423 pbCodeBuf[off++] = 0x81;
11424 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11425 pbCodeBuf[off++] = uAddend;
11426 pbCodeBuf[off++] = 0;
11427 }
11428
11429#else
11430 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11431 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11432
11433 /* sub tmp, gstgrp, uAddend */
11434 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
11435
11436 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11437 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11438
11439 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11440#endif
11441
11442 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11443
11444 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11445
11446 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11447 return off;
11448}
11449
11450
11451#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
11452 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11453
11454#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
11455 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11456
11457/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
11458DECL_INLINE_THROW(uint32_t)
11459iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
11460{
11461 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11462 kIemNativeGstRegUse_ForUpdate);
11463
11464#ifdef RT_ARCH_AMD64
11465 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11466 if (f64Bit)
11467 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11468 else if (idxGstTmpReg >= 8)
11469 pbCodeBuf[off++] = X86_OP_REX_B;
11470 if (uAddend == 1)
11471 {
11472 pbCodeBuf[off++] = 0xff; /* inc */
11473 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11474 }
11475 else if (uAddend < 128)
11476 {
11477 pbCodeBuf[off++] = 0x83; /* add */
11478 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11479 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11480 }
11481 else
11482 {
11483 pbCodeBuf[off++] = 0x81; /* add */
11484 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11485 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11486 pbCodeBuf[off++] = 0;
11487 pbCodeBuf[off++] = 0;
11488 pbCodeBuf[off++] = 0;
11489 }
11490
11491#else
11492 /* sub tmp, gstgrp, uAddend */
11493 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11494 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
11495
11496#endif
11497
11498 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11499
11500 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11501
11502 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11503 return off;
11504}
11505
11506
11507
11508#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11509 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11510
11511/** Emits code for IEM_MC_SUB_GREG_U16. */
11512DECL_INLINE_THROW(uint32_t)
11513iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
11514{
11515 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11516 kIemNativeGstRegUse_ForUpdate);
11517
11518#ifdef RT_ARCH_AMD64
11519 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11520 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11521 if (idxGstTmpReg >= 8)
11522 pbCodeBuf[off++] = X86_OP_REX_B;
11523 if (uSubtrahend == 1)
11524 {
11525 pbCodeBuf[off++] = 0xff; /* dec */
11526 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11527 }
11528 else
11529 {
11530 pbCodeBuf[off++] = 0x81;
11531 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11532 pbCodeBuf[off++] = uSubtrahend;
11533 pbCodeBuf[off++] = 0;
11534 }
11535
11536#else
11537 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11538 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11539
11540 /* sub tmp, gstgrp, uSubtrahend */
11541 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
11542
11543 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11544 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11545
11546 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11547#endif
11548
11549 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11550
11551 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11552
11553 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11554 return off;
11555}
11556
11557
11558#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
11559 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11560
11561#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
11562 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11563
11564/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
11565DECL_INLINE_THROW(uint32_t)
11566iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
11567{
11568 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11569 kIemNativeGstRegUse_ForUpdate);
11570
11571#ifdef RT_ARCH_AMD64
11572 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11573 if (f64Bit)
11574 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11575 else if (idxGstTmpReg >= 8)
11576 pbCodeBuf[off++] = X86_OP_REX_B;
11577 if (uSubtrahend == 1)
11578 {
11579 pbCodeBuf[off++] = 0xff; /* dec */
11580 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11581 }
11582 else if (uSubtrahend < 128)
11583 {
11584 pbCodeBuf[off++] = 0x83; /* sub */
11585 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11586 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11587 }
11588 else
11589 {
11590 pbCodeBuf[off++] = 0x81; /* sub */
11591 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11592 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11593 pbCodeBuf[off++] = 0;
11594 pbCodeBuf[off++] = 0;
11595 pbCodeBuf[off++] = 0;
11596 }
11597
11598#else
11599 /* sub tmp, gstgrp, uSubtrahend */
11600 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11601 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
11602
11603#endif
11604
11605 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11606
11607 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11608
11609 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11610 return off;
11611}
11612
11613
11614/*********************************************************************************************************************************
11615* Local variable manipulation (add, sub, and, or). *
11616*********************************************************************************************************************************/
11617
11618#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
11619 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11620
11621#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
11622 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11623
11624#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
11625 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11626
11627#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
11628 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11629
11630/** Emits code for AND'ing a local and a constant value. */
11631DECL_INLINE_THROW(uint32_t)
11632iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11633{
11634#ifdef VBOX_STRICT
11635 switch (cbMask)
11636 {
11637 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11638 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11639 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11640 case sizeof(uint64_t): break;
11641 default: AssertFailedBreak();
11642 }
11643#endif
11644
11645 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11646 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11647
11648 if (cbMask <= sizeof(uint32_t))
11649 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
11650 else
11651 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
11652
11653 iemNativeVarRegisterRelease(pReNative, idxVar);
11654 return off;
11655}
11656
11657
11658#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
11659 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11660
11661#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
11662 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11663
11664#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
11665 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11666
11667#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
11668 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11669
11670/** Emits code for OR'ing a local and a constant value. */
11671DECL_INLINE_THROW(uint32_t)
11672iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11673{
11674#ifdef VBOX_STRICT
11675 switch (cbMask)
11676 {
11677 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11678 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11679 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11680 case sizeof(uint64_t): break;
11681 default: AssertFailedBreak();
11682 }
11683#endif
11684
11685 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11686 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11687
11688 if (cbMask <= sizeof(uint32_t))
11689 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
11690 else
11691 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
11692
11693 iemNativeVarRegisterRelease(pReNative, idxVar);
11694 return off;
11695}
11696
11697
11698#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
11699 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
11700
11701#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
11702 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
11703
11704#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
11705 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
11706
11707/** Emits code for reversing the byte order in a local value. */
11708DECL_INLINE_THROW(uint32_t)
11709iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
11710{
11711 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11712 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
11713
11714 switch (cbLocal)
11715 {
11716 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
11717 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
11718 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
11719 default: AssertFailedBreak();
11720 }
11721
11722 iemNativeVarRegisterRelease(pReNative, idxVar);
11723 return off;
11724}
11725
11726
11727
11728/*********************************************************************************************************************************
11729* EFLAGS *
11730*********************************************************************************************************************************/
11731
11732#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
11733# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
11734#else
11735# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
11736 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
11737
11738DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
11739{
11740 if (fEflOutput)
11741 {
11742 PVMCPUCC const pVCpu = pReNative->pVCpu;
11743# ifndef IEMLIVENESS_EXTENDED_LAYOUT
11744 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
11745 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
11746 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
11747# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11748 if (fEflOutput & (a_fEfl)) \
11749 { \
11750 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
11751 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11752 else \
11753 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11754 } else do { } while (0)
11755# else
11756 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
11757 IEMLIVENESSBIT const LivenessClobbered =
11758 {
11759 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11760 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11761 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11762 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11763 };
11764 IEMLIVENESSBIT const LivenessDelayable =
11765 {
11766 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11767 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11768 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11769 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11770 };
11771# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11772 if (fEflOutput & (a_fEfl)) \
11773 { \
11774 if (LivenessClobbered.a_fLivenessMember) \
11775 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11776 else if (LivenessDelayable.a_fLivenessMember) \
11777 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
11778 else \
11779 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11780 } else do { } while (0)
11781# endif
11782 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
11783 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
11784 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
11785 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
11786 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
11787 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
11788 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
11789# undef CHECK_FLAG_AND_UPDATE_STATS
11790 }
11791 RT_NOREF(fEflInput);
11792}
11793#endif /* VBOX_WITH_STATISTICS */
11794
11795#undef IEM_MC_FETCH_EFLAGS /* should not be used */
11796#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11797 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
11798
11799/** Handles IEM_MC_FETCH_EFLAGS_EX. */
11800DECL_INLINE_THROW(uint32_t)
11801iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
11802 uint32_t fEflInput, uint32_t fEflOutput)
11803{
11804 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
11805 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11806 RT_NOREF(fEflInput, fEflOutput);
11807
11808#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
11809# ifdef VBOX_STRICT
11810 if ( pReNative->idxCurCall != 0
11811 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
11812 {
11813 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
11814 uint32_t const fBoth = fEflInput | fEflOutput;
11815# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
11816 AssertMsg( !(fBoth & (a_fElfConst)) \
11817 || (!(fEflInput & (a_fElfConst)) \
11818 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11819 : !(fEflOutput & (a_fElfConst)) \
11820 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11821 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
11822 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
11823 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
11824 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
11825 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
11826 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
11827 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
11828 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
11829 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
11830# undef ASSERT_ONE_EFL
11831 }
11832# endif
11833#endif
11834
11835 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
11836 * the existing shadow copy. */
11837 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
11838 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11839 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11840 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11841 return off;
11842}
11843
11844
11845
11846/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
11847 * start using it with custom native code emission (inlining assembly
11848 * instruction helpers). */
11849#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
11850#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11851 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11852 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
11853
11854/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
11855DECL_INLINE_THROW(uint32_t)
11856iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
11857{
11858 RT_NOREF(fEflOutput);
11859 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
11860 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11861
11862#ifdef VBOX_STRICT
11863 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
11864 uint32_t offFixup = off;
11865 off = iemNativeEmitJnzToFixed(pReNative, off, off);
11866 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
11867 iemNativeFixupFixedJump(pReNative, offFixup, off);
11868
11869 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
11870 offFixup = off;
11871 off = iemNativeEmitJzToFixed(pReNative, off, off);
11872 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
11873 iemNativeFixupFixedJump(pReNative, offFixup, off);
11874
11875 /** @todo validate that only bits in the fElfOutput mask changed. */
11876#endif
11877
11878 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11879 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
11880 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11881 return off;
11882}
11883
11884
11885
11886/*********************************************************************************************************************************
11887* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
11888*********************************************************************************************************************************/
11889
11890#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
11891 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
11892
11893#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
11894 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
11895
11896#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
11897 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
11898
11899
11900/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
11901 * IEM_MC_FETCH_SREG_ZX_U64. */
11902DECL_INLINE_THROW(uint32_t)
11903iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
11904{
11905 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
11906 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
11907 Assert(iSReg < X86_SREG_COUNT);
11908
11909 /*
11910 * For now, we will not create a shadow copy of a selector. The rational
11911 * is that since we do not recompile the popping and loading of segment
11912 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
11913 * pushing and moving to registers, there is only a small chance that the
11914 * shadow copy will be accessed again before the register is reloaded. One
11915 * scenario would be nested called in 16-bit code, but I doubt it's worth
11916 * the extra register pressure atm.
11917 *
11918 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
11919 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
11920 * store scencario covered at present (r160730).
11921 */
11922 iemNativeVarSetKindToStack(pReNative, idxDstVar);
11923 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
11924 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
11925 iemNativeVarRegisterRelease(pReNative, idxDstVar);
11926 return off;
11927}
11928
11929
11930
11931/*********************************************************************************************************************************
11932* Register references. *
11933*********************************************************************************************************************************/
11934
11935#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
11936 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
11937
11938#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
11939 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
11940
11941/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
11942DECL_INLINE_THROW(uint32_t)
11943iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
11944{
11945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
11946 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11947 Assert(iGRegEx < 20);
11948
11949 if (iGRegEx < 16)
11950 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11951 else
11952 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
11953
11954 /* If we've delayed writing back the register value, flush it now. */
11955 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11956
11957 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11958 if (!fConst)
11959 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
11960
11961 return off;
11962}
11963
11964#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
11965 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
11966
11967#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
11968 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
11969
11970#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
11971 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
11972
11973#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
11974 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
11975
11976#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
11977 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
11978
11979#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
11980 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
11981
11982#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
11983 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
11984
11985#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
11986 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
11987
11988#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
11989 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
11990
11991#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
11992 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
11993
11994/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
11995DECL_INLINE_THROW(uint32_t)
11996iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
11997{
11998 Assert(iGReg < 16);
11999 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
12000 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12001
12002 /* If we've delayed writing back the register value, flush it now. */
12003 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
12004
12005 /* If it's not a const reference we need to flush the shadow copy of the register now. */
12006 if (!fConst)
12007 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
12008
12009 return off;
12010}
12011
12012
12013#undef IEM_MC_REF_EFLAGS /* should not be used. */
12014#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
12015 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
12016 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
12017
12018/** Handles IEM_MC_REF_EFLAGS. */
12019DECL_INLINE_THROW(uint32_t)
12020iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12021{
12022 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
12023 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12024
12025 /* If we've delayed writing back the register value, flush it now. */
12026 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
12027
12028 /* If there is a shadow copy of guest EFLAGS, flush it now. */
12029 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
12030
12031 return off;
12032}
12033
12034
12035/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
12036 * different code from threaded recompiler, maybe it would be helpful. For now
12037 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
12038#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
12039
12040
12041#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
12042 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
12043
12044#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
12045 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
12046
12047#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
12048 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
12049
12050/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
12051DECL_INLINE_THROW(uint32_t)
12052iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
12053{
12054 Assert(iXReg < 16);
12055 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
12056 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12057
12058 /* If we've delayed writing back the register value, flush it now. */
12059 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
12060
12061#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
12062 /* If it's not a const reference we need to flush the shadow copy of the register now. */
12063 if (!fConst)
12064 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
12065#else
12066 RT_NOREF(fConst);
12067#endif
12068
12069 return off;
12070}
12071
12072
12073#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
12074 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
12075
12076/** Handles IEM_MC_REF_MXCSR. */
12077DECL_INLINE_THROW(uint32_t)
12078iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12079{
12080 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
12081 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12082
12083 /* If we've delayed writing back the register value, flush it now. */
12084 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
12085
12086 /* If there is a shadow copy of guest MXCSR, flush it now. */
12087 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
12088
12089 return off;
12090}
12091
12092
12093
12094/*********************************************************************************************************************************
12095* Effective Address Calculation *
12096*********************************************************************************************************************************/
12097#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
12098 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
12099
12100/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
12101 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
12102DECL_INLINE_THROW(uint32_t)
12103iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12104 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
12105{
12106 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12107
12108 /*
12109 * Handle the disp16 form with no registers first.
12110 *
12111 * Convert to an immediate value, as that'll delay the register allocation
12112 * and assignment till the memory access / call / whatever and we can use
12113 * a more appropriate register (or none at all).
12114 */
12115 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
12116 {
12117 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
12118 return off;
12119 }
12120
12121 /* Determin the displacment. */
12122 uint16_t u16EffAddr;
12123 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12124 {
12125 case 0: u16EffAddr = 0; break;
12126 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
12127 case 2: u16EffAddr = u16Disp; break;
12128 default: AssertFailedStmt(u16EffAddr = 0);
12129 }
12130
12131 /* Determine the registers involved. */
12132 uint8_t idxGstRegBase;
12133 uint8_t idxGstRegIndex;
12134 switch (bRm & X86_MODRM_RM_MASK)
12135 {
12136 case 0:
12137 idxGstRegBase = X86_GREG_xBX;
12138 idxGstRegIndex = X86_GREG_xSI;
12139 break;
12140 case 1:
12141 idxGstRegBase = X86_GREG_xBX;
12142 idxGstRegIndex = X86_GREG_xDI;
12143 break;
12144 case 2:
12145 idxGstRegBase = X86_GREG_xBP;
12146 idxGstRegIndex = X86_GREG_xSI;
12147 break;
12148 case 3:
12149 idxGstRegBase = X86_GREG_xBP;
12150 idxGstRegIndex = X86_GREG_xDI;
12151 break;
12152 case 4:
12153 idxGstRegBase = X86_GREG_xSI;
12154 idxGstRegIndex = UINT8_MAX;
12155 break;
12156 case 5:
12157 idxGstRegBase = X86_GREG_xDI;
12158 idxGstRegIndex = UINT8_MAX;
12159 break;
12160 case 6:
12161 idxGstRegBase = X86_GREG_xBP;
12162 idxGstRegIndex = UINT8_MAX;
12163 break;
12164#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
12165 default:
12166#endif
12167 case 7:
12168 idxGstRegBase = X86_GREG_xBX;
12169 idxGstRegIndex = UINT8_MAX;
12170 break;
12171 }
12172
12173 /*
12174 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
12175 */
12176 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12177 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12178 kIemNativeGstRegUse_ReadOnly);
12179 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
12180 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12181 kIemNativeGstRegUse_ReadOnly)
12182 : UINT8_MAX;
12183#ifdef RT_ARCH_AMD64
12184 if (idxRegIndex == UINT8_MAX)
12185 {
12186 if (u16EffAddr == 0)
12187 {
12188 /* movxz ret, base */
12189 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
12190 }
12191 else
12192 {
12193 /* lea ret32, [base64 + disp32] */
12194 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12195 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12196 if (idxRegRet >= 8 || idxRegBase >= 8)
12197 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12198 pbCodeBuf[off++] = 0x8d;
12199 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12200 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
12201 else
12202 {
12203 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
12204 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12205 }
12206 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12207 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12208 pbCodeBuf[off++] = 0;
12209 pbCodeBuf[off++] = 0;
12210 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12211
12212 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12213 }
12214 }
12215 else
12216 {
12217 /* lea ret32, [index64 + base64 (+ disp32)] */
12218 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12219 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12220 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12221 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12222 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12223 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12224 pbCodeBuf[off++] = 0x8d;
12225 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
12226 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12227 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
12228 if (bMod == X86_MOD_MEM4)
12229 {
12230 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12231 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12232 pbCodeBuf[off++] = 0;
12233 pbCodeBuf[off++] = 0;
12234 }
12235 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12236 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12237 }
12238
12239#elif defined(RT_ARCH_ARM64)
12240 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
12241 if (u16EffAddr == 0)
12242 {
12243 if (idxRegIndex == UINT8_MAX)
12244 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
12245 else
12246 {
12247 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
12248 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12249 }
12250 }
12251 else
12252 {
12253 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
12254 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
12255 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
12256 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12257 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
12258 else
12259 {
12260 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
12261 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12262 }
12263 if (idxRegIndex != UINT8_MAX)
12264 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
12265 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12266 }
12267
12268#else
12269# error "port me"
12270#endif
12271
12272 if (idxRegIndex != UINT8_MAX)
12273 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12274 iemNativeRegFreeTmp(pReNative, idxRegBase);
12275 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12276 return off;
12277}
12278
12279
12280#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
12281 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
12282
12283/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
12284 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
12285DECL_INLINE_THROW(uint32_t)
12286iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12287 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
12288{
12289 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12290
12291 /*
12292 * Handle the disp32 form with no registers first.
12293 *
12294 * Convert to an immediate value, as that'll delay the register allocation
12295 * and assignment till the memory access / call / whatever and we can use
12296 * a more appropriate register (or none at all).
12297 */
12298 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12299 {
12300 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
12301 return off;
12302 }
12303
12304 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12305 uint32_t u32EffAddr = 0;
12306 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12307 {
12308 case 0: break;
12309 case 1: u32EffAddr = (int8_t)u32Disp; break;
12310 case 2: u32EffAddr = u32Disp; break;
12311 default: AssertFailed();
12312 }
12313
12314 /* Get the register (or SIB) value. */
12315 uint8_t idxGstRegBase = UINT8_MAX;
12316 uint8_t idxGstRegIndex = UINT8_MAX;
12317 uint8_t cShiftIndex = 0;
12318 switch (bRm & X86_MODRM_RM_MASK)
12319 {
12320 case 0: idxGstRegBase = X86_GREG_xAX; break;
12321 case 1: idxGstRegBase = X86_GREG_xCX; break;
12322 case 2: idxGstRegBase = X86_GREG_xDX; break;
12323 case 3: idxGstRegBase = X86_GREG_xBX; break;
12324 case 4: /* SIB */
12325 {
12326 /* index /w scaling . */
12327 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12328 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12329 {
12330 case 0: idxGstRegIndex = X86_GREG_xAX; break;
12331 case 1: idxGstRegIndex = X86_GREG_xCX; break;
12332 case 2: idxGstRegIndex = X86_GREG_xDX; break;
12333 case 3: idxGstRegIndex = X86_GREG_xBX; break;
12334 case 4: cShiftIndex = 0; /*no index*/ break;
12335 case 5: idxGstRegIndex = X86_GREG_xBP; break;
12336 case 6: idxGstRegIndex = X86_GREG_xSI; break;
12337 case 7: idxGstRegIndex = X86_GREG_xDI; break;
12338 }
12339
12340 /* base */
12341 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
12342 {
12343 case 0: idxGstRegBase = X86_GREG_xAX; break;
12344 case 1: idxGstRegBase = X86_GREG_xCX; break;
12345 case 2: idxGstRegBase = X86_GREG_xDX; break;
12346 case 3: idxGstRegBase = X86_GREG_xBX; break;
12347 case 4:
12348 idxGstRegBase = X86_GREG_xSP;
12349 u32EffAddr += uSibAndRspOffset >> 8;
12350 break;
12351 case 5:
12352 if ((bRm & X86_MODRM_MOD_MASK) != 0)
12353 idxGstRegBase = X86_GREG_xBP;
12354 else
12355 {
12356 Assert(u32EffAddr == 0);
12357 u32EffAddr = u32Disp;
12358 }
12359 break;
12360 case 6: idxGstRegBase = X86_GREG_xSI; break;
12361 case 7: idxGstRegBase = X86_GREG_xDI; break;
12362 }
12363 break;
12364 }
12365 case 5: idxGstRegBase = X86_GREG_xBP; break;
12366 case 6: idxGstRegBase = X86_GREG_xSI; break;
12367 case 7: idxGstRegBase = X86_GREG_xDI; break;
12368 }
12369
12370 /*
12371 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12372 * the start of the function.
12373 */
12374 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12375 {
12376 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
12377 return off;
12378 }
12379
12380 /*
12381 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12382 */
12383 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12384 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12385 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12386 kIemNativeGstRegUse_ReadOnly);
12387 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12388 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12389 kIemNativeGstRegUse_ReadOnly);
12390
12391 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12392 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12393 {
12394 idxRegBase = idxRegIndex;
12395 idxRegIndex = UINT8_MAX;
12396 }
12397
12398#ifdef RT_ARCH_AMD64
12399 if (idxRegIndex == UINT8_MAX)
12400 {
12401 if (u32EffAddr == 0)
12402 {
12403 /* mov ret, base */
12404 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12405 }
12406 else
12407 {
12408 /* lea ret32, [base64 + disp32] */
12409 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12410 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12411 if (idxRegRet >= 8 || idxRegBase >= 8)
12412 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12413 pbCodeBuf[off++] = 0x8d;
12414 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12415 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12416 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12417 else
12418 {
12419 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12420 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12421 }
12422 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12423 if (bMod == X86_MOD_MEM4)
12424 {
12425 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12426 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12427 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12428 }
12429 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12430 }
12431 }
12432 else
12433 {
12434 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12435 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12436 if (idxRegBase == UINT8_MAX)
12437 {
12438 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
12439 if (idxRegRet >= 8 || idxRegIndex >= 8)
12440 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12441 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12442 pbCodeBuf[off++] = 0x8d;
12443 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12444 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12445 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12446 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12447 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12448 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12449 }
12450 else
12451 {
12452 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12453 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12454 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12455 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12456 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12457 pbCodeBuf[off++] = 0x8d;
12458 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12459 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12460 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12461 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12462 if (bMod != X86_MOD_MEM0)
12463 {
12464 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12465 if (bMod == X86_MOD_MEM4)
12466 {
12467 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12468 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12469 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12470 }
12471 }
12472 }
12473 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12474 }
12475
12476#elif defined(RT_ARCH_ARM64)
12477 if (u32EffAddr == 0)
12478 {
12479 if (idxRegIndex == UINT8_MAX)
12480 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12481 else if (idxRegBase == UINT8_MAX)
12482 {
12483 if (cShiftIndex == 0)
12484 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
12485 else
12486 {
12487 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12488 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
12489 }
12490 }
12491 else
12492 {
12493 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12494 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12495 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12496 }
12497 }
12498 else
12499 {
12500 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
12501 {
12502 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12503 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
12504 }
12505 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
12506 {
12507 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12508 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12509 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
12510 }
12511 else
12512 {
12513 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
12514 if (idxRegBase != UINT8_MAX)
12515 {
12516 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12517 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12518 }
12519 }
12520 if (idxRegIndex != UINT8_MAX)
12521 {
12522 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12523 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12524 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12525 }
12526 }
12527
12528#else
12529# error "port me"
12530#endif
12531
12532 if (idxRegIndex != UINT8_MAX)
12533 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12534 if (idxRegBase != UINT8_MAX)
12535 iemNativeRegFreeTmp(pReNative, idxRegBase);
12536 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12537 return off;
12538}
12539
12540
12541#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12542 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12543 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12544
12545#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12546 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12547 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12548
12549#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12550 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12551 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
12552
12553/**
12554 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
12555 *
12556 * @returns New off.
12557 * @param pReNative .
12558 * @param off .
12559 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
12560 * bit 4 to REX.X. The two bits are part of the
12561 * REG sub-field, which isn't needed in this
12562 * function.
12563 * @param uSibAndRspOffset Two parts:
12564 * - The first 8 bits make up the SIB byte.
12565 * - The next 8 bits are the fixed RSP/ESP offset
12566 * in case of a pop [xSP].
12567 * @param u32Disp The displacement byte/word/dword, if any.
12568 * @param cbInstr The size of the fully decoded instruction. Used
12569 * for RIP relative addressing.
12570 * @param idxVarRet The result variable number.
12571 * @param f64Bit Whether to use a 64-bit or 32-bit address size
12572 * when calculating the address.
12573 *
12574 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
12575 */
12576DECL_INLINE_THROW(uint32_t)
12577iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
12578 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
12579{
12580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12581
12582 /*
12583 * Special case the rip + disp32 form first.
12584 */
12585 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12586 {
12587#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12588 /* Need to take the current PC offset into account for the displacement, no need to flush here
12589 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
12590 u32Disp += pReNative->Core.offPc;
12591#endif
12592
12593 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12594 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
12595 kIemNativeGstRegUse_ReadOnly);
12596#ifdef RT_ARCH_AMD64
12597 if (f64Bit)
12598 {
12599 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
12600 if ((int32_t)offFinalDisp == offFinalDisp)
12601 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
12602 else
12603 {
12604 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
12605 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
12606 }
12607 }
12608 else
12609 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
12610
12611#elif defined(RT_ARCH_ARM64)
12612 if (f64Bit)
12613 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12614 (int64_t)(int32_t)u32Disp + cbInstr);
12615 else
12616 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12617 (int32_t)u32Disp + cbInstr);
12618
12619#else
12620# error "Port me!"
12621#endif
12622 iemNativeRegFreeTmp(pReNative, idxRegPc);
12623 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12624 return off;
12625 }
12626
12627 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12628 int64_t i64EffAddr = 0;
12629 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12630 {
12631 case 0: break;
12632 case 1: i64EffAddr = (int8_t)u32Disp; break;
12633 case 2: i64EffAddr = (int32_t)u32Disp; break;
12634 default: AssertFailed();
12635 }
12636
12637 /* Get the register (or SIB) value. */
12638 uint8_t idxGstRegBase = UINT8_MAX;
12639 uint8_t idxGstRegIndex = UINT8_MAX;
12640 uint8_t cShiftIndex = 0;
12641 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
12642 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
12643 else /* SIB: */
12644 {
12645 /* index /w scaling . */
12646 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12647 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12648 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
12649 if (idxGstRegIndex == 4)
12650 {
12651 /* no index */
12652 cShiftIndex = 0;
12653 idxGstRegIndex = UINT8_MAX;
12654 }
12655
12656 /* base */
12657 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
12658 if (idxGstRegBase == 4)
12659 {
12660 /* pop [rsp] hack */
12661 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
12662 }
12663 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
12664 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
12665 {
12666 /* mod=0 and base=5 -> disp32, no base reg. */
12667 Assert(i64EffAddr == 0);
12668 i64EffAddr = (int32_t)u32Disp;
12669 idxGstRegBase = UINT8_MAX;
12670 }
12671 }
12672
12673 /*
12674 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12675 * the start of the function.
12676 */
12677 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12678 {
12679 if (f64Bit)
12680 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
12681 else
12682 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
12683 return off;
12684 }
12685
12686 /*
12687 * Now emit code that calculates:
12688 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12689 * or if !f64Bit:
12690 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12691 */
12692 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12693 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12694 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12695 kIemNativeGstRegUse_ReadOnly);
12696 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12697 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12698 kIemNativeGstRegUse_ReadOnly);
12699
12700 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12701 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12702 {
12703 idxRegBase = idxRegIndex;
12704 idxRegIndex = UINT8_MAX;
12705 }
12706
12707#ifdef RT_ARCH_AMD64
12708 uint8_t bFinalAdj;
12709 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
12710 bFinalAdj = 0; /* likely */
12711 else
12712 {
12713 /* pop [rsp] with a problematic disp32 value. Split out the
12714 RSP offset and add it separately afterwards (bFinalAdj). */
12715 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
12716 Assert(idxGstRegBase == X86_GREG_xSP);
12717 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
12718 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
12719 Assert(bFinalAdj != 0);
12720 i64EffAddr -= bFinalAdj;
12721 Assert((int32_t)i64EffAddr == i64EffAddr);
12722 }
12723 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
12724//pReNative->pInstrBuf[off++] = 0xcc;
12725
12726 if (idxRegIndex == UINT8_MAX)
12727 {
12728 if (u32EffAddr == 0)
12729 {
12730 /* mov ret, base */
12731 if (f64Bit)
12732 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
12733 else
12734 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12735 }
12736 else
12737 {
12738 /* lea ret, [base + disp32] */
12739 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12740 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12741 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
12742 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12743 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12744 | (f64Bit ? X86_OP_REX_W : 0);
12745 pbCodeBuf[off++] = 0x8d;
12746 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12747 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12748 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12749 else
12750 {
12751 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12752 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12753 }
12754 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12755 if (bMod == X86_MOD_MEM4)
12756 {
12757 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12758 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12759 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12760 }
12761 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12762 }
12763 }
12764 else
12765 {
12766 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12767 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12768 if (idxRegBase == UINT8_MAX)
12769 {
12770 /* lea ret, [(index64 << cShiftIndex) + disp32] */
12771 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
12772 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12773 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12774 | (f64Bit ? X86_OP_REX_W : 0);
12775 pbCodeBuf[off++] = 0x8d;
12776 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12777 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12778 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12779 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12780 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12781 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12782 }
12783 else
12784 {
12785 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12786 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12787 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12788 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12789 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12790 | (f64Bit ? X86_OP_REX_W : 0);
12791 pbCodeBuf[off++] = 0x8d;
12792 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12793 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12794 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12795 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12796 if (bMod != X86_MOD_MEM0)
12797 {
12798 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12799 if (bMod == X86_MOD_MEM4)
12800 {
12801 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12802 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12803 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12804 }
12805 }
12806 }
12807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12808 }
12809
12810 if (!bFinalAdj)
12811 { /* likely */ }
12812 else
12813 {
12814 Assert(f64Bit);
12815 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
12816 }
12817
12818#elif defined(RT_ARCH_ARM64)
12819 if (i64EffAddr == 0)
12820 {
12821 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12822 if (idxRegIndex == UINT8_MAX)
12823 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
12824 else if (idxRegBase != UINT8_MAX)
12825 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12826 f64Bit, false /*fSetFlags*/, cShiftIndex);
12827 else
12828 {
12829 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
12830 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
12831 }
12832 }
12833 else
12834 {
12835 if (f64Bit)
12836 { /* likely */ }
12837 else
12838 i64EffAddr = (int32_t)i64EffAddr;
12839
12840 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
12841 {
12842 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12843 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
12844 }
12845 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
12846 {
12847 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12848 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
12849 }
12850 else
12851 {
12852 if (f64Bit)
12853 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
12854 else
12855 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
12856 if (idxRegBase != UINT8_MAX)
12857 {
12858 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12859 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
12860 }
12861 }
12862 if (idxRegIndex != UINT8_MAX)
12863 {
12864 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12865 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12866 f64Bit, false /*fSetFlags*/, cShiftIndex);
12867 }
12868 }
12869
12870#else
12871# error "port me"
12872#endif
12873
12874 if (idxRegIndex != UINT8_MAX)
12875 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12876 if (idxRegBase != UINT8_MAX)
12877 iemNativeRegFreeTmp(pReNative, idxRegBase);
12878 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12879 return off;
12880}
12881
12882
12883/*********************************************************************************************************************************
12884* TLB Lookup. *
12885*********************************************************************************************************************************/
12886
12887/**
12888 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
12889 */
12890DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
12891{
12892 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
12893 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
12894 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
12895 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
12896
12897 /* Do the lookup manually. */
12898 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
12899 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
12900 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
12901 if (RT_LIKELY(pTlbe->uTag == uTag))
12902 {
12903 /*
12904 * Check TLB page table level access flags.
12905 */
12906 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
12907 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
12908 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
12909 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
12910 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
12911 | IEMTLBE_F_PG_UNASSIGNED
12912 | IEMTLBE_F_PT_NO_ACCESSED
12913 | fNoWriteNoDirty | fNoUser);
12914 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
12915 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
12916 {
12917 /*
12918 * Return the address.
12919 */
12920 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
12921 if ((uintptr_t)pbAddr == uResult)
12922 return;
12923 RT_NOREF(cbMem);
12924 AssertFailed();
12925 }
12926 else
12927 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
12928 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
12929 }
12930 else
12931 AssertFailed();
12932 RT_BREAKPOINT();
12933}
12934
12935/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
12936
12937
12938/*********************************************************************************************************************************
12939* Memory fetches and stores common *
12940*********************************************************************************************************************************/
12941
12942typedef enum IEMNATIVEMITMEMOP
12943{
12944 kIemNativeEmitMemOp_Store = 0,
12945 kIemNativeEmitMemOp_Fetch,
12946 kIemNativeEmitMemOp_Fetch_Zx_U16,
12947 kIemNativeEmitMemOp_Fetch_Zx_U32,
12948 kIemNativeEmitMemOp_Fetch_Zx_U64,
12949 kIemNativeEmitMemOp_Fetch_Sx_U16,
12950 kIemNativeEmitMemOp_Fetch_Sx_U32,
12951 kIemNativeEmitMemOp_Fetch_Sx_U64
12952} IEMNATIVEMITMEMOP;
12953
12954/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
12955 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
12956 * (with iSegReg = UINT8_MAX). */
12957DECL_INLINE_THROW(uint32_t)
12958iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
12959 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
12960 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
12961{
12962 /*
12963 * Assert sanity.
12964 */
12965 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12966 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12967 Assert( enmOp != kIemNativeEmitMemOp_Store
12968 || pVarValue->enmKind == kIemNativeVarKind_Immediate
12969 || pVarValue->enmKind == kIemNativeVarKind_Stack);
12970 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12971 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
12972 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
12973 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
12974 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12975 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12976 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
12977 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12978#ifdef VBOX_STRICT
12979 if (iSegReg == UINT8_MAX)
12980 {
12981 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12982 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12983 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12984 switch (cbMem)
12985 {
12986 case 1:
12987 Assert( pfnFunction
12988 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
12989 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12990 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12991 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12992 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12993 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
12994 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
12995 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
12996 : UINT64_C(0xc000b000a0009000) ));
12997 break;
12998 case 2:
12999 Assert( pfnFunction
13000 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
13001 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
13002 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
13003 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
13004 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
13005 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
13006 : UINT64_C(0xc000b000a0009000) ));
13007 break;
13008 case 4:
13009 Assert( pfnFunction
13010 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
13011 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
13012 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
13013 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
13014 : UINT64_C(0xc000b000a0009000) ));
13015 break;
13016 case 8:
13017 Assert( pfnFunction
13018 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
13019 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
13020 : UINT64_C(0xc000b000a0009000) ));
13021 break;
13022 }
13023 }
13024 else
13025 {
13026 Assert(iSegReg < 6);
13027 switch (cbMem)
13028 {
13029 case 1:
13030 Assert( pfnFunction
13031 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
13032 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
13033 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13034 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13035 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13036 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
13037 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
13038 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
13039 : UINT64_C(0xc000b000a0009000) ));
13040 break;
13041 case 2:
13042 Assert( pfnFunction
13043 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
13044 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
13045 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13046 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13047 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
13048 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
13049 : UINT64_C(0xc000b000a0009000) ));
13050 break;
13051 case 4:
13052 Assert( pfnFunction
13053 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
13054 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
13055 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
13056 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
13057 : UINT64_C(0xc000b000a0009000) ));
13058 break;
13059 case 8:
13060 Assert( pfnFunction
13061 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
13062 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
13063 : UINT64_C(0xc000b000a0009000) ));
13064 break;
13065 }
13066 }
13067#endif
13068
13069#ifdef VBOX_STRICT
13070 /*
13071 * Check that the fExec flags we've got make sense.
13072 */
13073 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13074#endif
13075
13076 /*
13077 * To keep things simple we have to commit any pending writes first as we
13078 * may end up making calls.
13079 */
13080 /** @todo we could postpone this till we make the call and reload the
13081 * registers after returning from the call. Not sure if that's sensible or
13082 * not, though. */
13083#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13084 off = iemNativeRegFlushPendingWrites(pReNative, off);
13085#else
13086 /* The program counter is treated differently for now. */
13087 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
13088#endif
13089
13090#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13091 /*
13092 * Move/spill/flush stuff out of call-volatile registers.
13093 * This is the easy way out. We could contain this to the tlb-miss branch
13094 * by saving and restoring active stuff here.
13095 */
13096 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13097#endif
13098
13099 /*
13100 * Define labels and allocate the result register (trying for the return
13101 * register if we can).
13102 */
13103 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13104 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
13105 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13106 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
13107 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
13108 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
13109 uint8_t const idxRegValueStore = !TlbState.fSkip
13110 && enmOp == kIemNativeEmitMemOp_Store
13111 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13112 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
13113 : UINT8_MAX;
13114 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13115 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13116 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13117 : UINT32_MAX;
13118
13119 /*
13120 * Jump to the TLB lookup code.
13121 */
13122 if (!TlbState.fSkip)
13123 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13124
13125 /*
13126 * TlbMiss:
13127 *
13128 * Call helper to do the fetching.
13129 * We flush all guest register shadow copies here.
13130 */
13131 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13132
13133#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13134 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13135#else
13136 RT_NOREF(idxInstr);
13137#endif
13138
13139#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13140 if (pReNative->Core.offPc)
13141 {
13142 /*
13143 * Update the program counter but restore it at the end of the TlbMiss branch.
13144 * This should allow delaying more program counter updates for the TlbLookup and hit paths
13145 * which are hopefully much more frequent, reducing the amount of memory accesses.
13146 */
13147 /* Allocate a temporary PC register. */
13148 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13149
13150 /* Perform the addition and store the result. */
13151 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13152 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13153
13154 /* Free and flush the PC register. */
13155 iemNativeRegFreeTmp(pReNative, idxPcReg);
13156 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13157 }
13158#endif
13159
13160#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13161 /* Save variables in volatile registers. */
13162 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13163 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
13164 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
13165 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13166#endif
13167
13168 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
13169 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
13170 if (enmOp == kIemNativeEmitMemOp_Store)
13171 {
13172 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
13173 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
13174#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13175 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13176#else
13177 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13178 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
13179#endif
13180 }
13181
13182 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
13183 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
13184#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13185 fVolGregMask);
13186#else
13187 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
13188#endif
13189
13190 if (iSegReg != UINT8_MAX)
13191 {
13192 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
13193 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13194 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
13195 }
13196
13197 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13198 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13199
13200 /* Done setting up parameters, make the call. */
13201 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13202
13203 /*
13204 * Put the result in the right register if this is a fetch.
13205 */
13206 if (enmOp != kIemNativeEmitMemOp_Store)
13207 {
13208 Assert(idxRegValueFetch == pVarValue->idxReg);
13209 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
13210 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
13211 }
13212
13213#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13214 /* Restore variables and guest shadow registers to volatile registers. */
13215 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13216 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13217#endif
13218
13219#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13220 if (pReNative->Core.offPc)
13221 {
13222 /*
13223 * Time to restore the program counter to its original value.
13224 */
13225 /* Allocate a temporary PC register. */
13226 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13227
13228 /* Restore the original value. */
13229 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13230 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13231
13232 /* Free and flush the PC register. */
13233 iemNativeRegFreeTmp(pReNative, idxPcReg);
13234 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13235 }
13236#endif
13237
13238#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13239 if (!TlbState.fSkip)
13240 {
13241 /* end of TlbMiss - Jump to the done label. */
13242 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13243 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13244
13245 /*
13246 * TlbLookup:
13247 */
13248 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
13249 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
13250 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
13251
13252 /*
13253 * Emit code to do the actual storing / fetching.
13254 */
13255 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13256# ifdef VBOX_WITH_STATISTICS
13257 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13258 enmOp == kIemNativeEmitMemOp_Store
13259 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
13260 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
13261# endif
13262 switch (enmOp)
13263 {
13264 case kIemNativeEmitMemOp_Store:
13265 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
13266 {
13267 switch (cbMem)
13268 {
13269 case 1:
13270 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13271 break;
13272 case 2:
13273 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13274 break;
13275 case 4:
13276 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13277 break;
13278 case 8:
13279 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13280 break;
13281 default:
13282 AssertFailed();
13283 }
13284 }
13285 else
13286 {
13287 switch (cbMem)
13288 {
13289 case 1:
13290 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
13291 idxRegMemResult, TlbState.idxReg1);
13292 break;
13293 case 2:
13294 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
13295 idxRegMemResult, TlbState.idxReg1);
13296 break;
13297 case 4:
13298 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
13299 idxRegMemResult, TlbState.idxReg1);
13300 break;
13301 case 8:
13302 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
13303 idxRegMemResult, TlbState.idxReg1);
13304 break;
13305 default:
13306 AssertFailed();
13307 }
13308 }
13309 break;
13310
13311 case kIemNativeEmitMemOp_Fetch:
13312 case kIemNativeEmitMemOp_Fetch_Zx_U16:
13313 case kIemNativeEmitMemOp_Fetch_Zx_U32:
13314 case kIemNativeEmitMemOp_Fetch_Zx_U64:
13315 switch (cbMem)
13316 {
13317 case 1:
13318 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13319 break;
13320 case 2:
13321 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13322 break;
13323 case 4:
13324 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13325 break;
13326 case 8:
13327 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13328 break;
13329 default:
13330 AssertFailed();
13331 }
13332 break;
13333
13334 case kIemNativeEmitMemOp_Fetch_Sx_U16:
13335 Assert(cbMem == 1);
13336 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13337 break;
13338
13339 case kIemNativeEmitMemOp_Fetch_Sx_U32:
13340 Assert(cbMem == 1 || cbMem == 2);
13341 if (cbMem == 1)
13342 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13343 else
13344 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13345 break;
13346
13347 case kIemNativeEmitMemOp_Fetch_Sx_U64:
13348 switch (cbMem)
13349 {
13350 case 1:
13351 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13352 break;
13353 case 2:
13354 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13355 break;
13356 case 4:
13357 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13358 break;
13359 default:
13360 AssertFailed();
13361 }
13362 break;
13363
13364 default:
13365 AssertFailed();
13366 }
13367
13368 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13369
13370 /*
13371 * TlbDone:
13372 */
13373 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13374
13375 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13376
13377# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13378 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13379 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13380# endif
13381 }
13382#else
13383 RT_NOREF(fAlignMask, idxLabelTlbMiss);
13384#endif
13385
13386 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
13387 iemNativeVarRegisterRelease(pReNative, idxVarValue);
13388 return off;
13389}
13390
13391
13392
13393/*********************************************************************************************************************************
13394* Memory fetches (IEM_MEM_FETCH_XXX). *
13395*********************************************************************************************************************************/
13396
13397/* 8-bit segmented: */
13398#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
13399 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
13400 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13401 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13402
13403#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13404 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13405 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13406 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13407
13408#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13409 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13410 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13411 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13412
13413#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13414 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13415 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13416 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13417
13418#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13419 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13420 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13421 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13422
13423#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13424 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13425 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13426 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13427
13428#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13429 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13430 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13431 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13432
13433/* 16-bit segmented: */
13434#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13435 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13436 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13437 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13438
13439#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13440 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13441 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13442 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13443
13444#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13445 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13446 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13447 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13448
13449#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13450 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13451 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13452 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13453
13454#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13455 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13456 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13457 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13458
13459#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13460 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13461 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13462 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13463
13464
13465/* 32-bit segmented: */
13466#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13467 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13468 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13469 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13470
13471#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13472 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13473 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13474 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13475
13476#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13477 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13478 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13479 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13480
13481#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13482 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13483 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13484 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13485
13486
13487/* 64-bit segmented: */
13488#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13489 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13490 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13491 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
13492
13493
13494
13495/* 8-bit flat: */
13496#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
13497 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
13498 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13499 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13500
13501#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
13502 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13503 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13504 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13505
13506#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
13507 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13508 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13509 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13510
13511#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
13512 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13513 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13514 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13515
13516#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
13517 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13518 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13519 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13520
13521#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
13522 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13523 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13524 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13525
13526#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
13527 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13528 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13529 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13530
13531
13532/* 16-bit flat: */
13533#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
13534 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13535 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13536 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13537
13538#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
13539 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13540 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13541 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13542
13543#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
13544 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13545 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13546 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13547
13548#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
13549 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13550 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13551 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13552
13553#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
13554 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13555 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13556 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13557
13558#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
13559 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13560 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13561 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13562
13563/* 32-bit flat: */
13564#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
13565 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13566 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13567 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13568
13569#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
13570 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13571 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13572 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13573
13574#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
13575 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13576 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13577 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13578
13579#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
13580 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13581 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13582 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13583
13584/* 64-bit flat: */
13585#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
13586 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13587 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13588 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
13589
13590
13591
13592/*********************************************************************************************************************************
13593* Memory stores (IEM_MEM_STORE_XXX). *
13594*********************************************************************************************************************************/
13595
13596#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
13597 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
13598 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13599 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13600
13601#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
13602 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
13603 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13604 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13605
13606#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
13607 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
13608 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13609 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13610
13611#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
13612 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
13613 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13614 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13615
13616
13617#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
13618 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
13619 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13620 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13621
13622#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
13623 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
13624 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13625 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13626
13627#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
13628 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
13629 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13630 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13631
13632#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
13633 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
13634 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13635 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13636
13637
13638#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
13639 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13640 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13641
13642#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
13643 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13644 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13645
13646#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
13647 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13648 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13649
13650#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
13651 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13652 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13653
13654
13655#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
13656 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13657 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13658
13659#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
13660 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13661 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13662
13663#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
13664 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13665 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13666
13667#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
13668 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13669 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13670
13671/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
13672 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
13673DECL_INLINE_THROW(uint32_t)
13674iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
13675 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
13676{
13677 /*
13678 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
13679 * to do the grunt work.
13680 */
13681 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
13682 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
13683 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
13684 pfnFunction, idxInstr);
13685 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
13686 return off;
13687}
13688
13689
13690
13691/*********************************************************************************************************************************
13692* Stack Accesses. *
13693*********************************************************************************************************************************/
13694/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
13695#define IEM_MC_PUSH_U16(a_u16Value) \
13696 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
13697 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
13698#define IEM_MC_PUSH_U32(a_u32Value) \
13699 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
13700 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
13701#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
13702 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
13703 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
13704#define IEM_MC_PUSH_U64(a_u64Value) \
13705 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
13706 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
13707
13708#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
13709 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
13710 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13711#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
13712 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
13713 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
13714#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
13715 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
13716 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
13717
13718#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
13719 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
13720 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13721#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
13722 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
13723 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
13724
13725
13726DECL_FORCE_INLINE_THROW(uint32_t)
13727iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13728{
13729 /* Use16BitSp: */
13730#ifdef RT_ARCH_AMD64
13731 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
13732 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13733#else
13734 /* sub regeff, regrsp, #cbMem */
13735 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
13736 /* and regeff, regeff, #0xffff */
13737 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
13738 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
13739 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
13740 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
13741#endif
13742 return off;
13743}
13744
13745
13746DECL_FORCE_INLINE(uint32_t)
13747iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13748{
13749 /* Use32BitSp: */
13750 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
13751 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13752 return off;
13753}
13754
13755
13756/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
13757DECL_INLINE_THROW(uint32_t)
13758iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
13759 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
13760{
13761 /*
13762 * Assert sanity.
13763 */
13764 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
13765 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
13766#ifdef VBOX_STRICT
13767 if (RT_BYTE2(cBitsVarAndFlat) != 0)
13768 {
13769 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13770 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13771 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13772 Assert( pfnFunction
13773 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13774 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
13775 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
13776 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13777 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
13778 : UINT64_C(0xc000b000a0009000) ));
13779 }
13780 else
13781 Assert( pfnFunction
13782 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
13783 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
13784 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
13785 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
13786 : UINT64_C(0xc000b000a0009000) ));
13787#endif
13788
13789#ifdef VBOX_STRICT
13790 /*
13791 * Check that the fExec flags we've got make sense.
13792 */
13793 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13794#endif
13795
13796 /*
13797 * To keep things simple we have to commit any pending writes first as we
13798 * may end up making calls.
13799 */
13800 /** @todo we could postpone this till we make the call and reload the
13801 * registers after returning from the call. Not sure if that's sensible or
13802 * not, though. */
13803 off = iemNativeRegFlushPendingWrites(pReNative, off);
13804
13805 /*
13806 * First we calculate the new RSP and the effective stack pointer value.
13807 * For 64-bit mode and flat 32-bit these two are the same.
13808 * (Code structure is very similar to that of PUSH)
13809 */
13810 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13811 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
13812 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
13813 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
13814 ? cbMem : sizeof(uint16_t);
13815 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13816 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13817 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13818 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13819 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13820 if (cBitsFlat != 0)
13821 {
13822 Assert(idxRegEffSp == idxRegRsp);
13823 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13824 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13825 if (cBitsFlat == 64)
13826 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
13827 else
13828 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
13829 }
13830 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13831 {
13832 Assert(idxRegEffSp != idxRegRsp);
13833 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13834 kIemNativeGstRegUse_ReadOnly);
13835#ifdef RT_ARCH_AMD64
13836 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13837#else
13838 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13839#endif
13840 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13841 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13842 offFixupJumpToUseOtherBitSp = off;
13843 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13844 {
13845 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13846 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13847 }
13848 else
13849 {
13850 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13851 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13852 }
13853 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13854 }
13855 /* SpUpdateEnd: */
13856 uint32_t const offLabelSpUpdateEnd = off;
13857
13858 /*
13859 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13860 * we're skipping lookup).
13861 */
13862 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13863 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
13864 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13865 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13866 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13867 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13868 : UINT32_MAX;
13869 uint8_t const idxRegValue = !TlbState.fSkip
13870 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13871 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
13872 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
13873 : UINT8_MAX;
13874 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13875
13876
13877 if (!TlbState.fSkip)
13878 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13879 else
13880 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13881
13882 /*
13883 * Use16BitSp:
13884 */
13885 if (cBitsFlat == 0)
13886 {
13887#ifdef RT_ARCH_AMD64
13888 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13889#else
13890 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13891#endif
13892 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13893 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13894 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13895 else
13896 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13897 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13898 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13899 }
13900
13901 /*
13902 * TlbMiss:
13903 *
13904 * Call helper to do the pushing.
13905 */
13906 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13907
13908#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13909 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13910#else
13911 RT_NOREF(idxInstr);
13912#endif
13913
13914 /* Save variables in volatile registers. */
13915 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13916 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13917 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
13918 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
13919 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13920
13921 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
13922 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
13923 {
13924 /* Swap them using ARG0 as temp register: */
13925 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
13926 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
13927 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
13928 }
13929 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
13930 {
13931 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
13932 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
13933 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13934
13935 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
13936 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13937 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13938 }
13939 else
13940 {
13941 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
13942 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13943
13944 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
13945 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
13946 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
13947 }
13948
13949 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13950 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13951
13952 /* Done setting up parameters, make the call. */
13953 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13954
13955 /* Restore variables and guest shadow registers to volatile registers. */
13956 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13957 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13958
13959#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13960 if (!TlbState.fSkip)
13961 {
13962 /* end of TlbMiss - Jump to the done label. */
13963 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13964 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13965
13966 /*
13967 * TlbLookup:
13968 */
13969 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
13970 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13971
13972 /*
13973 * Emit code to do the actual storing / fetching.
13974 */
13975 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13976# ifdef VBOX_WITH_STATISTICS
13977 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13978 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13979# endif
13980 if (idxRegValue != UINT8_MAX)
13981 {
13982 switch (cbMemAccess)
13983 {
13984 case 2:
13985 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13986 break;
13987 case 4:
13988 if (!fIsIntelSeg)
13989 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13990 else
13991 {
13992 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
13993 PUSH FS in real mode, so we have to try emulate that here.
13994 We borrow the now unused idxReg1 from the TLB lookup code here. */
13995 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
13996 kIemNativeGstReg_EFlags);
13997 if (idxRegEfl != UINT8_MAX)
13998 {
13999#ifdef ARCH_AMD64
14000 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
14001 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
14002 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
14003#else
14004 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
14005 off, TlbState.idxReg1, idxRegEfl,
14006 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
14007#endif
14008 iemNativeRegFreeTmp(pReNative, idxRegEfl);
14009 }
14010 else
14011 {
14012 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
14013 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
14014 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
14015 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
14016 }
14017 /* ASSUMES the upper half of idxRegValue is ZERO. */
14018 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
14019 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
14020 }
14021 break;
14022 case 8:
14023 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
14024 break;
14025 default:
14026 AssertFailed();
14027 }
14028 }
14029 else
14030 {
14031 switch (cbMemAccess)
14032 {
14033 case 2:
14034 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
14035 idxRegMemResult, TlbState.idxReg1);
14036 break;
14037 case 4:
14038 Assert(!fIsSegReg);
14039 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
14040 idxRegMemResult, TlbState.idxReg1);
14041 break;
14042 case 8:
14043 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
14044 break;
14045 default:
14046 AssertFailed();
14047 }
14048 }
14049
14050 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14051 TlbState.freeRegsAndReleaseVars(pReNative);
14052
14053 /*
14054 * TlbDone:
14055 *
14056 * Commit the new RSP value.
14057 */
14058 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14059 }
14060#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14061
14062 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
14063 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14064 if (idxRegEffSp != idxRegRsp)
14065 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14066
14067 /* The value variable is implictly flushed. */
14068 if (idxRegValue != UINT8_MAX)
14069 iemNativeVarRegisterRelease(pReNative, idxVarValue);
14070 iemNativeVarFreeLocal(pReNative, idxVarValue);
14071
14072 return off;
14073}
14074
14075
14076
14077/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
14078#define IEM_MC_POP_GREG_U16(a_iGReg) \
14079 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
14080 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
14081#define IEM_MC_POP_GREG_U32(a_iGReg) \
14082 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
14083 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
14084#define IEM_MC_POP_GREG_U64(a_iGReg) \
14085 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
14086 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
14087
14088#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
14089 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
14090 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14091#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
14092 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
14093 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
14094
14095#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
14096 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
14097 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14098#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
14099 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
14100 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
14101
14102
14103DECL_FORCE_INLINE_THROW(uint32_t)
14104iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
14105 uint8_t idxRegTmp)
14106{
14107 /* Use16BitSp: */
14108#ifdef RT_ARCH_AMD64
14109 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14110 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
14111 RT_NOREF(idxRegTmp);
14112#else
14113 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
14114 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
14115 /* add tmp, regrsp, #cbMem */
14116 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
14117 /* and tmp, tmp, #0xffff */
14118 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
14119 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
14120 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
14121 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
14122#endif
14123 return off;
14124}
14125
14126
14127DECL_FORCE_INLINE(uint32_t)
14128iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
14129{
14130 /* Use32BitSp: */
14131 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14132 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
14133 return off;
14134}
14135
14136
14137/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
14138DECL_INLINE_THROW(uint32_t)
14139iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
14140 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
14141{
14142 /*
14143 * Assert sanity.
14144 */
14145 Assert(idxGReg < 16);
14146#ifdef VBOX_STRICT
14147 if (RT_BYTE2(cBitsVarAndFlat) != 0)
14148 {
14149 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14150 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14151 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14152 Assert( pfnFunction
14153 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14154 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
14155 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14156 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
14157 : UINT64_C(0xc000b000a0009000) ));
14158 }
14159 else
14160 Assert( pfnFunction
14161 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
14162 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
14163 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
14164 : UINT64_C(0xc000b000a0009000) ));
14165#endif
14166
14167#ifdef VBOX_STRICT
14168 /*
14169 * Check that the fExec flags we've got make sense.
14170 */
14171 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14172#endif
14173
14174 /*
14175 * To keep things simple we have to commit any pending writes first as we
14176 * may end up making calls.
14177 */
14178 off = iemNativeRegFlushPendingWrites(pReNative, off);
14179
14180 /*
14181 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
14182 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
14183 * directly as the effective stack pointer.
14184 * (Code structure is very similar to that of PUSH)
14185 */
14186 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
14187 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
14188 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
14189 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
14190 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
14191 /** @todo can do a better job picking the register here. For cbMem >= 4 this
14192 * will be the resulting register value. */
14193 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
14194
14195 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
14196 if (cBitsFlat != 0)
14197 {
14198 Assert(idxRegEffSp == idxRegRsp);
14199 Assert(cBitsFlat == 32 || cBitsFlat == 64);
14200 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
14201 }
14202 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
14203 {
14204 Assert(idxRegEffSp != idxRegRsp);
14205 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
14206 kIemNativeGstRegUse_ReadOnly);
14207#ifdef RT_ARCH_AMD64
14208 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14209#else
14210 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14211#endif
14212 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
14213 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
14214 offFixupJumpToUseOtherBitSp = off;
14215 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14216 {
14217/** @todo can skip idxRegRsp updating when popping ESP. */
14218 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
14219 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14220 }
14221 else
14222 {
14223 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
14224 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14225 }
14226 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14227 }
14228 /* SpUpdateEnd: */
14229 uint32_t const offLabelSpUpdateEnd = off;
14230
14231 /*
14232 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
14233 * we're skipping lookup).
14234 */
14235 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
14236 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
14237 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14238 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
14239 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14240 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14241 : UINT32_MAX;
14242
14243 if (!TlbState.fSkip)
14244 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14245 else
14246 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
14247
14248 /*
14249 * Use16BitSp:
14250 */
14251 if (cBitsFlat == 0)
14252 {
14253#ifdef RT_ARCH_AMD64
14254 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14255#else
14256 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14257#endif
14258 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
14259 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14260 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14261 else
14262 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14263 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
14264 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14265 }
14266
14267 /*
14268 * TlbMiss:
14269 *
14270 * Call helper to do the pushing.
14271 */
14272 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
14273
14274#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14275 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14276#else
14277 RT_NOREF(idxInstr);
14278#endif
14279
14280 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
14281 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
14282 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
14283 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14284
14285
14286 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
14287 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
14288 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
14289
14290 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14291 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14292
14293 /* Done setting up parameters, make the call. */
14294 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14295
14296 /* Move the return register content to idxRegMemResult. */
14297 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14298 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14299
14300 /* Restore variables and guest shadow registers to volatile registers. */
14301 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14302 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14303
14304#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14305 if (!TlbState.fSkip)
14306 {
14307 /* end of TlbMiss - Jump to the done label. */
14308 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14309 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14310
14311 /*
14312 * TlbLookup:
14313 */
14314 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
14315 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14316
14317 /*
14318 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
14319 */
14320 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14321# ifdef VBOX_WITH_STATISTICS
14322 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
14323 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
14324# endif
14325 switch (cbMem)
14326 {
14327 case 2:
14328 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14329 break;
14330 case 4:
14331 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14332 break;
14333 case 8:
14334 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14335 break;
14336 default:
14337 AssertFailed();
14338 }
14339
14340 TlbState.freeRegsAndReleaseVars(pReNative);
14341
14342 /*
14343 * TlbDone:
14344 *
14345 * Set the new RSP value (FLAT accesses needs to calculate it first) and
14346 * commit the popped register value.
14347 */
14348 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14349 }
14350#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14351
14352 if (idxGReg != X86_GREG_xSP)
14353 {
14354 /* Set the register. */
14355 if (cbMem >= sizeof(uint32_t))
14356 {
14357#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14358 AssertMsg( pReNative->idxCurCall == 0
14359 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
14360 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
14361#endif
14362 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
14363 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
14364 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14365 }
14366 else
14367 {
14368 Assert(cbMem == sizeof(uint16_t));
14369 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
14370 kIemNativeGstRegUse_ForUpdate);
14371 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
14372 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14373 iemNativeRegFreeTmp(pReNative, idxRegDst);
14374 }
14375
14376 /* Complete RSP calculation for FLAT mode. */
14377 if (idxRegEffSp == idxRegRsp)
14378 {
14379 if (cBitsFlat == 64)
14380 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14381 else
14382 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14383 }
14384 }
14385 else
14386 {
14387 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
14388 if (cbMem == sizeof(uint64_t))
14389 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
14390 else if (cbMem == sizeof(uint32_t))
14391 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
14392 else
14393 {
14394 if (idxRegEffSp == idxRegRsp)
14395 {
14396 if (cBitsFlat == 64)
14397 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14398 else
14399 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14400 }
14401 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
14402 }
14403 }
14404 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
14405
14406 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14407 if (idxRegEffSp != idxRegRsp)
14408 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14409 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14410
14411 return off;
14412}
14413
14414
14415
14416/*********************************************************************************************************************************
14417* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
14418*********************************************************************************************************************************/
14419
14420#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14421 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14422 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14423 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
14424
14425#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14426 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14427 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14428 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
14429
14430#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14431 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14432 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14433 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
14434
14435#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14436 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14437 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14438 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
14439
14440
14441#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14442 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14443 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14444 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
14445
14446#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14447 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14448 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14449 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
14450
14451#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14452 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14453 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14454 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14455
14456#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14457 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14458 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14459 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
14460
14461#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14462 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
14463 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14464 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14465
14466
14467#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14468 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14469 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14470 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
14471
14472#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14473 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14474 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14475 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
14476
14477#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14478 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14479 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14480 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14481
14482#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14483 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14484 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14485 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
14486
14487#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14488 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
14489 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14490 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14491
14492
14493#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14494 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14495 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14496 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
14497
14498#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14499 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14500 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14501 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
14502#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14503 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14504 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14505 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14506
14507#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14508 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14509 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14510 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
14511
14512#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14513 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
14514 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14515 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14516
14517
14518#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14519 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14520 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14521 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
14522
14523#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14524 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14525 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14526 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
14527
14528
14529#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14530 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14531 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14532 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
14533
14534#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14535 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14536 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14537 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
14538
14539#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14540 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14541 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14542 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
14543
14544#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14545 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14546 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14547 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
14548
14549
14550
14551#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14552 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14553 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14554 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
14555
14556#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14557 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14558 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14559 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
14560
14561#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14562 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14563 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14564 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
14565
14566#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14567 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14568 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14569 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
14570
14571
14572#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14573 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14574 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14575 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
14576
14577#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14578 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14579 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14580 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
14581
14582#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14583 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14584 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14585 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14586
14587#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14588 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14589 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14590 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
14591
14592#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
14593 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
14594 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14595 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14596
14597
14598#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14599 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14600 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14601 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
14602
14603#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14604 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14605 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14606 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
14607
14608#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14609 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14610 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14611 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14612
14613#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14614 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14615 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14616 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
14617
14618#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
14619 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
14620 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14621 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14622
14623
14624#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14625 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14626 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14627 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
14628
14629#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14630 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14631 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14632 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
14633
14634#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14635 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14636 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14637 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14638
14639#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14640 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14641 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14642 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
14643
14644#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
14645 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
14646 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14647 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14648
14649
14650#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
14651 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14652 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14653 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
14654
14655#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
14656 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14657 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14658 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
14659
14660
14661#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14662 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14663 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14664 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
14665
14666#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14667 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14668 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14669 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
14670
14671#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14672 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14673 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14674 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
14675
14676#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14677 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14678 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14679 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
14680
14681
14682DECL_INLINE_THROW(uint32_t)
14683iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
14684 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
14685 uintptr_t pfnFunction, uint8_t idxInstr)
14686{
14687 /*
14688 * Assert sanity.
14689 */
14690 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
14691 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
14692 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
14693 && pVarMem->cbVar == sizeof(void *),
14694 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14695
14696 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14698 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
14699 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
14700 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14701
14702 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
14703 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
14704 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
14705 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
14706 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14707
14708 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
14709
14710 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
14711
14712#ifdef VBOX_STRICT
14713# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
14714 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
14715 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
14716 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
14717 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
14718# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
14719 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
14720 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
14721 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
14722
14723 if (iSegReg == UINT8_MAX)
14724 {
14725 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14726 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14727 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14728 switch (cbMem)
14729 {
14730 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
14731 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
14732 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
14733 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
14734 case 10:
14735 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
14736 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
14737 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14738 break;
14739 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
14740# if 0
14741 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
14742 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
14743# endif
14744 default: AssertFailed(); break;
14745 }
14746 }
14747 else
14748 {
14749 Assert(iSegReg < 6);
14750 switch (cbMem)
14751 {
14752 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
14753 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
14754 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
14755 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
14756 case 10:
14757 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
14758 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
14759 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14760 break;
14761 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
14762# if 0
14763 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
14764 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
14765# endif
14766 default: AssertFailed(); break;
14767 }
14768 }
14769# undef IEM_MAP_HLP_FN
14770# undef IEM_MAP_HLP_FN_NO_AT
14771#endif
14772
14773#ifdef VBOX_STRICT
14774 /*
14775 * Check that the fExec flags we've got make sense.
14776 */
14777 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14778#endif
14779
14780 /*
14781 * To keep things simple we have to commit any pending writes first as we
14782 * may end up making calls.
14783 */
14784 off = iemNativeRegFlushPendingWrites(pReNative, off);
14785
14786#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14787 /*
14788 * Move/spill/flush stuff out of call-volatile registers.
14789 * This is the easy way out. We could contain this to the tlb-miss branch
14790 * by saving and restoring active stuff here.
14791 */
14792 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
14793 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
14794#endif
14795
14796 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
14797 while the tlb-miss codepath will temporarily put it on the stack.
14798 Set the the type to stack here so we don't need to do it twice below. */
14799 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
14800 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
14801 /** @todo use a tmp register from TlbState, since they'll be free after tlb
14802 * lookup is done. */
14803
14804 /*
14805 * Define labels and allocate the result register (trying for the return
14806 * register if we can).
14807 */
14808 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14809 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
14810 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
14811 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
14812 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
14813 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14814 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14815 : UINT32_MAX;
14816//off=iemNativeEmitBrk(pReNative, off, 0);
14817 /*
14818 * Jump to the TLB lookup code.
14819 */
14820 if (!TlbState.fSkip)
14821 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14822
14823 /*
14824 * TlbMiss:
14825 *
14826 * Call helper to do the fetching.
14827 * We flush all guest register shadow copies here.
14828 */
14829 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
14830
14831#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14832 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14833#else
14834 RT_NOREF(idxInstr);
14835#endif
14836
14837#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14838 /* Save variables in volatile registers. */
14839 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
14840 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14841#endif
14842
14843 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
14844 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
14845#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14846 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
14847#else
14848 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14849#endif
14850
14851 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
14852 if (iSegReg != UINT8_MAX)
14853 {
14854 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
14855 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
14856 }
14857
14858 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
14859 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
14860 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
14861
14862 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14863 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14864
14865 /* Done setting up parameters, make the call. */
14866 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14867
14868 /*
14869 * Put the output in the right registers.
14870 */
14871 Assert(idxRegMemResult == pVarMem->idxReg);
14872 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14873 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14874
14875#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14876 /* Restore variables and guest shadow registers to volatile registers. */
14877 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14878 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14879#endif
14880
14881 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
14882 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
14883
14884#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14885 if (!TlbState.fSkip)
14886 {
14887 /* end of tlbsmiss - Jump to the done label. */
14888 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14889 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14890
14891 /*
14892 * TlbLookup:
14893 */
14894 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
14895 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14896# ifdef VBOX_WITH_STATISTICS
14897 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
14898 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
14899# endif
14900
14901 /* [idxVarUnmapInfo] = 0; */
14902 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
14903
14904 /*
14905 * TlbDone:
14906 */
14907 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14908
14909 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
14910
14911# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14912 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
14913 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14914# endif
14915 }
14916#else
14917 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
14918#endif
14919
14920 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
14921 iemNativeVarRegisterRelease(pReNative, idxVarMem);
14922
14923 return off;
14924}
14925
14926
14927#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
14928 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
14929 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
14930
14931#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
14932 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
14933 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
14934
14935#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
14936 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
14937 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
14938
14939#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
14940 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
14941 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
14942
14943DECL_INLINE_THROW(uint32_t)
14944iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
14945 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
14946{
14947 /*
14948 * Assert sanity.
14949 */
14950 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14951#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
14952 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14953#endif
14954 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
14955 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
14956 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
14957#ifdef VBOX_STRICT
14958 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
14959 {
14960 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
14961 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
14962 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
14963 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
14964 case IEM_ACCESS_TYPE_WRITE:
14965 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
14966 case IEM_ACCESS_TYPE_READ:
14967 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
14968 default: AssertFailed();
14969 }
14970#else
14971 RT_NOREF(fAccess);
14972#endif
14973
14974 /*
14975 * To keep things simple we have to commit any pending writes first as we
14976 * may end up making calls (there shouldn't be any at this point, so this
14977 * is just for consistency).
14978 */
14979 /** @todo we could postpone this till we make the call and reload the
14980 * registers after returning from the call. Not sure if that's sensible or
14981 * not, though. */
14982 off = iemNativeRegFlushPendingWrites(pReNative, off);
14983
14984 /*
14985 * Move/spill/flush stuff out of call-volatile registers.
14986 *
14987 * We exclude any register holding the bUnmapInfo variable, as we'll be
14988 * checking it after returning from the call and will free it afterwards.
14989 */
14990 /** @todo save+restore active registers and maybe guest shadows in miss
14991 * scenario. */
14992 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
14993
14994 /*
14995 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
14996 * to call the unmap helper function.
14997 *
14998 * The likelyhood of it being zero is higher than for the TLB hit when doing
14999 * the mapping, as a TLB miss for an well aligned and unproblematic memory
15000 * access should also end up with a mapping that won't need special unmapping.
15001 */
15002 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
15003 * should speed up things for the pure interpreter as well when TLBs
15004 * are enabled. */
15005#ifdef RT_ARCH_AMD64
15006 if (pVarUnmapInfo->idxReg == UINT8_MAX)
15007 {
15008 /* test byte [rbp - xxx], 0ffh */
15009 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
15010 pbCodeBuf[off++] = 0xf6;
15011 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
15012 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
15013 pbCodeBuf[off++] = 0xff;
15014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
15015 }
15016 else
15017#endif
15018 {
15019 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
15020 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
15021 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
15022 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
15023 }
15024 uint32_t const offJmpFixup = off;
15025 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
15026
15027 /*
15028 * Call the unmap helper function.
15029 */
15030#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
15031 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
15032#else
15033 RT_NOREF(idxInstr);
15034#endif
15035
15036 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
15037 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
15038 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
15039
15040 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
15041 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
15042
15043 /* Done setting up parameters, make the call. */
15044 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
15045
15046 /* The bUnmapInfo variable is implictly free by these MCs. */
15047 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
15048
15049 /*
15050 * Done, just fixup the jump for the non-call case.
15051 */
15052 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
15053
15054 return off;
15055}
15056
15057
15058
15059/*********************************************************************************************************************************
15060* State and Exceptions *
15061*********************************************************************************************************************************/
15062
15063#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15064#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15065
15066#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15067#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15068#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15069
15070#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15071#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15072#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15073
15074
15075DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
15076{
15077 /** @todo this needs a lot more work later. */
15078 RT_NOREF(pReNative, fForChange);
15079 return off;
15080}
15081
15082
15083
15084/*********************************************************************************************************************************
15085* Emitters for FPU related operations. *
15086*********************************************************************************************************************************/
15087
15088#define IEM_MC_FETCH_FCW(a_u16Fcw) \
15089 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
15090
15091/** Emits code for IEM_MC_FETCH_FCW. */
15092DECL_INLINE_THROW(uint32_t)
15093iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15094{
15095 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15096 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15097
15098 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15099
15100 /* Allocate a temporary FCW register. */
15101 /** @todo eliminate extra register */
15102 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
15103 kIemNativeGstRegUse_ReadOnly);
15104
15105 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
15106
15107 /* Free but don't flush the FCW register. */
15108 iemNativeRegFreeTmp(pReNative, idxFcwReg);
15109 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15110
15111 return off;
15112}
15113
15114
15115#define IEM_MC_FETCH_FSW(a_u16Fsw) \
15116 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
15117
15118/** Emits code for IEM_MC_FETCH_FSW. */
15119DECL_INLINE_THROW(uint32_t)
15120iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15121{
15122 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15123 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15124
15125 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
15126 /* Allocate a temporary FSW register. */
15127 /** @todo eliminate extra register */
15128 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
15129 kIemNativeGstRegUse_ReadOnly);
15130
15131 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
15132
15133 /* Free but don't flush the FSW register. */
15134 iemNativeRegFreeTmp(pReNative, idxFswReg);
15135 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15136
15137 return off;
15138}
15139
15140
15141
15142#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15143
15144
15145/*********************************************************************************************************************************
15146* Emitters for SSE/AVX specific operations. *
15147*********************************************************************************************************************************/
15148
15149#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
15150 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
15151
15152/** Emits code for IEM_MC_FETCH_FSW. */
15153DECL_INLINE_THROW(uint32_t)
15154iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
15155{
15156 /* Allocate destination and source register. */
15157 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
15158 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
15159 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
15160 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15161
15162 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
15163 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
15164 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
15165
15166 /* Free but don't flush the source and destination register. */
15167 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
15168 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15169
15170 return off;
15171}
15172#endif
15173
15174
15175/*********************************************************************************************************************************
15176* The native code generator functions for each MC block. *
15177*********************************************************************************************************************************/
15178
15179/*
15180 * Include instruction emitters.
15181 */
15182#include "target-x86/IEMAllN8veEmit-x86.h"
15183
15184/*
15185 * Include g_apfnIemNativeRecompileFunctions and associated functions.
15186 *
15187 * This should probably live in it's own file later, but lets see what the
15188 * compile times turn out to be first.
15189 */
15190#include "IEMNativeFunctions.cpp.h"
15191
15192
15193
15194/*********************************************************************************************************************************
15195* Recompiler Core. *
15196*********************************************************************************************************************************/
15197
15198
15199/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
15200static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
15201{
15202 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
15203 pDis->cbCachedInstr += cbMaxRead;
15204 RT_NOREF(cbMinRead);
15205 return VERR_NO_DATA;
15206}
15207
15208
15209DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
15210{
15211 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
15212 {
15213#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
15214 ENTRY(fLocalForcedActions),
15215 ENTRY(iem.s.rcPassUp),
15216 ENTRY(iem.s.fExec),
15217 ENTRY(iem.s.pbInstrBuf),
15218 ENTRY(iem.s.uInstrBufPc),
15219 ENTRY(iem.s.GCPhysInstrBuf),
15220 ENTRY(iem.s.cbInstrBufTotal),
15221 ENTRY(iem.s.idxTbCurInstr),
15222#ifdef VBOX_WITH_STATISTICS
15223 ENTRY(iem.s.StatNativeTlbHitsForFetch),
15224 ENTRY(iem.s.StatNativeTlbHitsForStore),
15225 ENTRY(iem.s.StatNativeTlbHitsForStack),
15226 ENTRY(iem.s.StatNativeTlbHitsForMapped),
15227 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
15228 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
15229 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
15230 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
15231#endif
15232 ENTRY(iem.s.DataTlb.aEntries),
15233 ENTRY(iem.s.DataTlb.uTlbRevision),
15234 ENTRY(iem.s.DataTlb.uTlbPhysRev),
15235 ENTRY(iem.s.DataTlb.cTlbHits),
15236 ENTRY(iem.s.CodeTlb.aEntries),
15237 ENTRY(iem.s.CodeTlb.uTlbRevision),
15238 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
15239 ENTRY(iem.s.CodeTlb.cTlbHits),
15240 ENTRY(pVMR3),
15241 ENTRY(cpum.GstCtx.rax),
15242 ENTRY(cpum.GstCtx.ah),
15243 ENTRY(cpum.GstCtx.rcx),
15244 ENTRY(cpum.GstCtx.ch),
15245 ENTRY(cpum.GstCtx.rdx),
15246 ENTRY(cpum.GstCtx.dh),
15247 ENTRY(cpum.GstCtx.rbx),
15248 ENTRY(cpum.GstCtx.bh),
15249 ENTRY(cpum.GstCtx.rsp),
15250 ENTRY(cpum.GstCtx.rbp),
15251 ENTRY(cpum.GstCtx.rsi),
15252 ENTRY(cpum.GstCtx.rdi),
15253 ENTRY(cpum.GstCtx.r8),
15254 ENTRY(cpum.GstCtx.r9),
15255 ENTRY(cpum.GstCtx.r10),
15256 ENTRY(cpum.GstCtx.r11),
15257 ENTRY(cpum.GstCtx.r12),
15258 ENTRY(cpum.GstCtx.r13),
15259 ENTRY(cpum.GstCtx.r14),
15260 ENTRY(cpum.GstCtx.r15),
15261 ENTRY(cpum.GstCtx.es.Sel),
15262 ENTRY(cpum.GstCtx.es.u64Base),
15263 ENTRY(cpum.GstCtx.es.u32Limit),
15264 ENTRY(cpum.GstCtx.es.Attr),
15265 ENTRY(cpum.GstCtx.cs.Sel),
15266 ENTRY(cpum.GstCtx.cs.u64Base),
15267 ENTRY(cpum.GstCtx.cs.u32Limit),
15268 ENTRY(cpum.GstCtx.cs.Attr),
15269 ENTRY(cpum.GstCtx.ss.Sel),
15270 ENTRY(cpum.GstCtx.ss.u64Base),
15271 ENTRY(cpum.GstCtx.ss.u32Limit),
15272 ENTRY(cpum.GstCtx.ss.Attr),
15273 ENTRY(cpum.GstCtx.ds.Sel),
15274 ENTRY(cpum.GstCtx.ds.u64Base),
15275 ENTRY(cpum.GstCtx.ds.u32Limit),
15276 ENTRY(cpum.GstCtx.ds.Attr),
15277 ENTRY(cpum.GstCtx.fs.Sel),
15278 ENTRY(cpum.GstCtx.fs.u64Base),
15279 ENTRY(cpum.GstCtx.fs.u32Limit),
15280 ENTRY(cpum.GstCtx.fs.Attr),
15281 ENTRY(cpum.GstCtx.gs.Sel),
15282 ENTRY(cpum.GstCtx.gs.u64Base),
15283 ENTRY(cpum.GstCtx.gs.u32Limit),
15284 ENTRY(cpum.GstCtx.gs.Attr),
15285 ENTRY(cpum.GstCtx.rip),
15286 ENTRY(cpum.GstCtx.eflags),
15287 ENTRY(cpum.GstCtx.uRipInhibitInt),
15288#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15289 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
15290 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
15291 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
15292 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
15293 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
15294 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
15295 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
15296 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
15297 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
15298 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
15299 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
15300 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
15301 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
15302 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
15303 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
15304 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
15305 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
15306 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
15307 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
15308 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
15309 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
15310 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
15311 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
15312 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
15313 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
15314 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
15315 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
15316 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
15317 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
15318 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
15319 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
15320 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
15321#endif
15322#undef ENTRY
15323 };
15324#ifdef VBOX_STRICT
15325 static bool s_fOrderChecked = false;
15326 if (!s_fOrderChecked)
15327 {
15328 s_fOrderChecked = true;
15329 uint32_t offPrev = s_aMembers[0].off;
15330 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
15331 {
15332 Assert(s_aMembers[i].off > offPrev);
15333 offPrev = s_aMembers[i].off;
15334 }
15335 }
15336#endif
15337
15338 /*
15339 * Binary lookup.
15340 */
15341 unsigned iStart = 0;
15342 unsigned iEnd = RT_ELEMENTS(s_aMembers);
15343 for (;;)
15344 {
15345 unsigned const iCur = iStart + (iEnd - iStart) / 2;
15346 uint32_t const offCur = s_aMembers[iCur].off;
15347 if (off < offCur)
15348 {
15349 if (iCur != iStart)
15350 iEnd = iCur;
15351 else
15352 break;
15353 }
15354 else if (off > offCur)
15355 {
15356 if (iCur + 1 < iEnd)
15357 iStart = iCur + 1;
15358 else
15359 break;
15360 }
15361 else
15362 return s_aMembers[iCur].pszName;
15363 }
15364#ifdef VBOX_WITH_STATISTICS
15365 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
15366 return "iem.s.acThreadedFuncStats[iFn]";
15367#endif
15368 return NULL;
15369}
15370
15371
15372/**
15373 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
15374 * @returns pszBuf.
15375 * @param fFlags The flags.
15376 * @param pszBuf The output buffer.
15377 * @param cbBuf The output buffer size. At least 32 bytes.
15378 */
15379DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
15380{
15381 Assert(cbBuf >= 32);
15382 static RTSTRTUPLE const s_aModes[] =
15383 {
15384 /* [00] = */ { RT_STR_TUPLE("16BIT") },
15385 /* [01] = */ { RT_STR_TUPLE("32BIT") },
15386 /* [02] = */ { RT_STR_TUPLE("!2!") },
15387 /* [03] = */ { RT_STR_TUPLE("!3!") },
15388 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
15389 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
15390 /* [06] = */ { RT_STR_TUPLE("!6!") },
15391 /* [07] = */ { RT_STR_TUPLE("!7!") },
15392 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
15393 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
15394 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
15395 /* [0b] = */ { RT_STR_TUPLE("!b!") },
15396 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
15397 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
15398 /* [0e] = */ { RT_STR_TUPLE("!e!") },
15399 /* [0f] = */ { RT_STR_TUPLE("!f!") },
15400 /* [10] = */ { RT_STR_TUPLE("!10!") },
15401 /* [11] = */ { RT_STR_TUPLE("!11!") },
15402 /* [12] = */ { RT_STR_TUPLE("!12!") },
15403 /* [13] = */ { RT_STR_TUPLE("!13!") },
15404 /* [14] = */ { RT_STR_TUPLE("!14!") },
15405 /* [15] = */ { RT_STR_TUPLE("!15!") },
15406 /* [16] = */ { RT_STR_TUPLE("!16!") },
15407 /* [17] = */ { RT_STR_TUPLE("!17!") },
15408 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
15409 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
15410 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
15411 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
15412 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
15413 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
15414 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
15415 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
15416 };
15417 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
15418 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
15419 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
15420
15421 pszBuf[off++] = ' ';
15422 pszBuf[off++] = 'C';
15423 pszBuf[off++] = 'P';
15424 pszBuf[off++] = 'L';
15425 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
15426 Assert(off < 32);
15427
15428 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
15429
15430 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
15431 {
15432 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
15433 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
15434 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
15435 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
15436 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
15437 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
15438 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
15439 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
15440 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
15441 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
15442 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
15443 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
15444 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
15445 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
15446 };
15447 if (fFlags)
15448 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
15449 if (s_aFlags[i].fFlag & fFlags)
15450 {
15451 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
15452 pszBuf[off++] = ' ';
15453 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
15454 off += s_aFlags[i].cchName;
15455 fFlags &= ~s_aFlags[i].fFlag;
15456 if (!fFlags)
15457 break;
15458 }
15459 pszBuf[off] = '\0';
15460
15461 return pszBuf;
15462}
15463
15464
15465DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
15466{
15467 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
15468#if defined(RT_ARCH_AMD64)
15469 static const char * const a_apszMarkers[] =
15470 {
15471 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
15472 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
15473 };
15474#endif
15475
15476 char szDisBuf[512];
15477 DISSTATE Dis;
15478 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
15479 uint32_t const cNative = pTb->Native.cInstructions;
15480 uint32_t offNative = 0;
15481#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15482 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
15483#endif
15484 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15485 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15486 : DISCPUMODE_64BIT;
15487#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15488 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
15489#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15490 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
15491#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15492# error "Port me"
15493#else
15494 csh hDisasm = ~(size_t)0;
15495# if defined(RT_ARCH_AMD64)
15496 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
15497# elif defined(RT_ARCH_ARM64)
15498 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
15499# else
15500# error "Port me"
15501# endif
15502 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
15503
15504 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
15505 //Assert(rcCs == CS_ERR_OK);
15506#endif
15507
15508 /*
15509 * Print TB info.
15510 */
15511 pHlp->pfnPrintf(pHlp,
15512 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
15513 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
15514 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
15515 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
15516#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15517 if (pDbgInfo && pDbgInfo->cEntries > 1)
15518 {
15519 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
15520
15521 /*
15522 * This disassembly is driven by the debug info which follows the native
15523 * code and indicates when it starts with the next guest instructions,
15524 * where labels are and such things.
15525 */
15526 uint32_t idxThreadedCall = 0;
15527 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
15528 uint8_t idxRange = UINT8_MAX;
15529 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
15530 uint32_t offRange = 0;
15531 uint32_t offOpcodes = 0;
15532 uint32_t const cbOpcodes = pTb->cbOpcodes;
15533 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
15534 uint32_t const cDbgEntries = pDbgInfo->cEntries;
15535 uint32_t iDbgEntry = 1;
15536 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
15537
15538 while (offNative < cNative)
15539 {
15540 /* If we're at or have passed the point where the next chunk of debug
15541 info starts, process it. */
15542 if (offDbgNativeNext <= offNative)
15543 {
15544 offDbgNativeNext = UINT32_MAX;
15545 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
15546 {
15547 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
15548 {
15549 case kIemTbDbgEntryType_GuestInstruction:
15550 {
15551 /* Did the exec flag change? */
15552 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
15553 {
15554 pHlp->pfnPrintf(pHlp,
15555 " fExec change %#08x -> %#08x %s\n",
15556 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15557 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15558 szDisBuf, sizeof(szDisBuf)));
15559 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
15560 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15561 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15562 : DISCPUMODE_64BIT;
15563 }
15564
15565 /* New opcode range? We need to fend up a spurious debug info entry here for cases
15566 where the compilation was aborted before the opcode was recorded and the actual
15567 instruction was translated to a threaded call. This may happen when we run out
15568 of ranges, or when some complicated interrupts/FFs are found to be pending or
15569 similar. So, we just deal with it here rather than in the compiler code as it
15570 is a lot simpler to do here. */
15571 if ( idxRange == UINT8_MAX
15572 || idxRange >= cRanges
15573 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
15574 {
15575 idxRange += 1;
15576 if (idxRange < cRanges)
15577 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
15578 else
15579 continue;
15580 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
15581 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
15582 + (pTb->aRanges[idxRange].idxPhysPage == 0
15583 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15584 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
15585 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15586 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
15587 pTb->aRanges[idxRange].idxPhysPage);
15588 GCPhysPc += offRange;
15589 }
15590
15591 /* Disassemble the instruction. */
15592 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
15593 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
15594 uint32_t cbInstr = 1;
15595 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15596 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
15597 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15598 if (RT_SUCCESS(rc))
15599 {
15600 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15601 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15602 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15603 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15604
15605 static unsigned const s_offMarker = 55;
15606 static char const s_szMarker[] = " ; <--- guest";
15607 if (cch < s_offMarker)
15608 {
15609 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
15610 cch = s_offMarker;
15611 }
15612 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
15613 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
15614
15615 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
15616 }
15617 else
15618 {
15619 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
15620 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
15621 cbInstr = 1;
15622 }
15623 GCPhysPc += cbInstr;
15624 offOpcodes += cbInstr;
15625 offRange += cbInstr;
15626 continue;
15627 }
15628
15629 case kIemTbDbgEntryType_ThreadedCall:
15630 pHlp->pfnPrintf(pHlp,
15631 " Call #%u to %s (%u args) - %s\n",
15632 idxThreadedCall,
15633 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15634 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15635 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
15636 idxThreadedCall++;
15637 continue;
15638
15639 case kIemTbDbgEntryType_GuestRegShadowing:
15640 {
15641 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15642 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
15643 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
15644 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
15645 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15646 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
15647 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
15648 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
15649 else
15650 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
15651 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
15652 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15653 continue;
15654 }
15655
15656#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15657 case kIemTbDbgEntryType_GuestSimdRegShadowing:
15658 {
15659 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15660 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
15661 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
15662 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
15663 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15664 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
15665 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
15666 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
15667 else
15668 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
15669 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
15670 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15671 continue;
15672 }
15673#endif
15674
15675 case kIemTbDbgEntryType_Label:
15676 {
15677 const char *pszName = "what_the_fudge";
15678 const char *pszComment = "";
15679 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
15680 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
15681 {
15682 case kIemNativeLabelType_Return:
15683 pszName = "Return";
15684 break;
15685 case kIemNativeLabelType_ReturnBreak:
15686 pszName = "ReturnBreak";
15687 break;
15688 case kIemNativeLabelType_ReturnWithFlags:
15689 pszName = "ReturnWithFlags";
15690 break;
15691 case kIemNativeLabelType_NonZeroRetOrPassUp:
15692 pszName = "NonZeroRetOrPassUp";
15693 break;
15694 case kIemNativeLabelType_RaiseGp0:
15695 pszName = "RaiseGp0";
15696 break;
15697 case kIemNativeLabelType_RaiseNm:
15698 pszName = "RaiseNm";
15699 break;
15700 case kIemNativeLabelType_RaiseUd:
15701 pszName = "RaiseUd";
15702 break;
15703 case kIemNativeLabelType_RaiseMf:
15704 pszName = "RaiseMf";
15705 break;
15706 case kIemNativeLabelType_RaiseXf:
15707 pszName = "RaiseXf";
15708 break;
15709 case kIemNativeLabelType_ObsoleteTb:
15710 pszName = "ObsoleteTb";
15711 break;
15712 case kIemNativeLabelType_NeedCsLimChecking:
15713 pszName = "NeedCsLimChecking";
15714 break;
15715 case kIemNativeLabelType_CheckBranchMiss:
15716 pszName = "CheckBranchMiss";
15717 break;
15718 case kIemNativeLabelType_If:
15719 pszName = "If";
15720 fNumbered = true;
15721 break;
15722 case kIemNativeLabelType_Else:
15723 pszName = "Else";
15724 fNumbered = true;
15725 pszComment = " ; regs state restored pre-if-block";
15726 break;
15727 case kIemNativeLabelType_Endif:
15728 pszName = "Endif";
15729 fNumbered = true;
15730 break;
15731 case kIemNativeLabelType_CheckIrq:
15732 pszName = "CheckIrq_CheckVM";
15733 fNumbered = true;
15734 break;
15735 case kIemNativeLabelType_TlbLookup:
15736 pszName = "TlbLookup";
15737 fNumbered = true;
15738 break;
15739 case kIemNativeLabelType_TlbMiss:
15740 pszName = "TlbMiss";
15741 fNumbered = true;
15742 break;
15743 case kIemNativeLabelType_TlbDone:
15744 pszName = "TlbDone";
15745 fNumbered = true;
15746 break;
15747 case kIemNativeLabelType_Invalid:
15748 case kIemNativeLabelType_End:
15749 break;
15750 }
15751 if (fNumbered)
15752 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
15753 else
15754 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
15755 continue;
15756 }
15757
15758 case kIemTbDbgEntryType_NativeOffset:
15759 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
15760 Assert(offDbgNativeNext > offNative);
15761 break;
15762
15763#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
15764 case kIemTbDbgEntryType_DelayedPcUpdate:
15765 pHlp->pfnPrintf(pHlp,
15766 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
15767 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
15768 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
15769 continue;
15770#endif
15771
15772 default:
15773 AssertFailed();
15774 }
15775 iDbgEntry++;
15776 break;
15777 }
15778 }
15779
15780 /*
15781 * Disassemble the next native instruction.
15782 */
15783 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15784# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15785 uint32_t cbInstr = sizeof(paNative[0]);
15786 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15787 if (RT_SUCCESS(rc))
15788 {
15789# if defined(RT_ARCH_AMD64)
15790 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15791 {
15792 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15793 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
15794 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
15795 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
15796 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
15797 uInfo & 0x8000 ? "recompiled" : "todo");
15798 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
15799 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
15800 else
15801 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
15802 }
15803 else
15804# endif
15805 {
15806 const char *pszAnnotation = NULL;
15807# ifdef RT_ARCH_AMD64
15808 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15809 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15810 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15811 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15812 PCDISOPPARAM pMemOp;
15813 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
15814 pMemOp = &Dis.Param1;
15815 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
15816 pMemOp = &Dis.Param2;
15817 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
15818 pMemOp = &Dis.Param3;
15819 else
15820 pMemOp = NULL;
15821 if ( pMemOp
15822 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
15823 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
15824 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
15825 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
15826
15827#elif defined(RT_ARCH_ARM64)
15828 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
15829 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15830 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15831# else
15832# error "Port me"
15833# endif
15834 if (pszAnnotation)
15835 {
15836 static unsigned const s_offAnnotation = 55;
15837 size_t const cchAnnotation = strlen(pszAnnotation);
15838 size_t cchDis = strlen(szDisBuf);
15839 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
15840 {
15841 if (cchDis < s_offAnnotation)
15842 {
15843 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
15844 cchDis = s_offAnnotation;
15845 }
15846 szDisBuf[cchDis++] = ' ';
15847 szDisBuf[cchDis++] = ';';
15848 szDisBuf[cchDis++] = ' ';
15849 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
15850 }
15851 }
15852 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
15853 }
15854 }
15855 else
15856 {
15857# if defined(RT_ARCH_AMD64)
15858 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
15859 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
15860# elif defined(RT_ARCH_ARM64)
15861 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
15862# else
15863# error "Port me"
15864# endif
15865 cbInstr = sizeof(paNative[0]);
15866 }
15867 offNative += cbInstr / sizeof(paNative[0]);
15868
15869# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15870 cs_insn *pInstr;
15871 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
15872 (uintptr_t)pNativeCur, 1, &pInstr);
15873 if (cInstrs > 0)
15874 {
15875 Assert(cInstrs == 1);
15876 const char *pszAnnotation = NULL;
15877# if defined(RT_ARCH_ARM64)
15878 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
15879 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
15880 {
15881 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
15882 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
15883 char *psz = strchr(pInstr->op_str, '[');
15884 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
15885 {
15886 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
15887 int32_t off = -1;
15888 psz += 4;
15889 if (*psz == ']')
15890 off = 0;
15891 else if (*psz == ',')
15892 {
15893 psz = RTStrStripL(psz + 1);
15894 if (*psz == '#')
15895 off = RTStrToInt32(&psz[1]);
15896 /** @todo deal with index registers and LSL as well... */
15897 }
15898 if (off >= 0)
15899 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
15900 }
15901 }
15902# endif
15903
15904 size_t const cchOp = strlen(pInstr->op_str);
15905# if defined(RT_ARCH_AMD64)
15906 if (pszAnnotation)
15907 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
15908 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
15909 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15910 else
15911 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
15912 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
15913
15914# else
15915 if (pszAnnotation)
15916 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
15917 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
15918 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15919 else
15920 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
15921 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
15922# endif
15923 offNative += pInstr->size / sizeof(*pNativeCur);
15924 cs_free(pInstr, cInstrs);
15925 }
15926 else
15927 {
15928# if defined(RT_ARCH_AMD64)
15929 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
15930 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
15931# else
15932 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
15933# endif
15934 offNative++;
15935 }
15936# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15937 }
15938 }
15939 else
15940#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
15941 {
15942 /*
15943 * No debug info, just disassemble the x86 code and then the native code.
15944 *
15945 * First the guest code:
15946 */
15947 for (unsigned i = 0; i < pTb->cRanges; i++)
15948 {
15949 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
15950 + (pTb->aRanges[i].idxPhysPage == 0
15951 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15952 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
15953 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15954 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
15955 unsigned off = pTb->aRanges[i].offOpcodes;
15956 /** @todo this ain't working when crossing pages! */
15957 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
15958 while (off < cbOpcodes)
15959 {
15960 uint32_t cbInstr = 1;
15961 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15962 &pTb->pabOpcodes[off], cbOpcodes - off,
15963 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15964 if (RT_SUCCESS(rc))
15965 {
15966 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15967 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15968 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15969 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15970 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
15971 GCPhysPc += cbInstr;
15972 off += cbInstr;
15973 }
15974 else
15975 {
15976 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
15977 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
15978 break;
15979 }
15980 }
15981 }
15982
15983 /*
15984 * Then the native code:
15985 */
15986 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
15987 while (offNative < cNative)
15988 {
15989 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15990# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15991 uint32_t cbInstr = sizeof(paNative[0]);
15992 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15993 if (RT_SUCCESS(rc))
15994 {
15995# if defined(RT_ARCH_AMD64)
15996 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15997 {
15998 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15999 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
16000 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
16001 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
16002 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
16003 uInfo & 0x8000 ? "recompiled" : "todo");
16004 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
16005 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
16006 else
16007 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
16008 }
16009 else
16010# endif
16011 {
16012# ifdef RT_ARCH_AMD64
16013 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
16014 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
16015 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16016 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16017# elif defined(RT_ARCH_ARM64)
16018 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
16019 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16020 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16021# else
16022# error "Port me"
16023# endif
16024 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
16025 }
16026 }
16027 else
16028 {
16029# if defined(RT_ARCH_AMD64)
16030 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
16031 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
16032# else
16033 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
16034# endif
16035 cbInstr = sizeof(paNative[0]);
16036 }
16037 offNative += cbInstr / sizeof(paNative[0]);
16038
16039# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16040 cs_insn *pInstr;
16041 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
16042 (uintptr_t)pNativeCur, 1, &pInstr);
16043 if (cInstrs > 0)
16044 {
16045 Assert(cInstrs == 1);
16046# if defined(RT_ARCH_AMD64)
16047 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
16048 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
16049# else
16050 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
16051 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
16052# endif
16053 offNative += pInstr->size / sizeof(*pNativeCur);
16054 cs_free(pInstr, cInstrs);
16055 }
16056 else
16057 {
16058# if defined(RT_ARCH_AMD64)
16059 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
16060 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
16061# else
16062 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
16063# endif
16064 offNative++;
16065 }
16066# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16067 }
16068 }
16069
16070#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
16071 /* Cleanup. */
16072 cs_close(&hDisasm);
16073#endif
16074}
16075
16076
16077/**
16078 * Recompiles the given threaded TB into a native one.
16079 *
16080 * In case of failure the translation block will be returned as-is.
16081 *
16082 * @returns pTb.
16083 * @param pVCpu The cross context virtual CPU structure of the calling
16084 * thread.
16085 * @param pTb The threaded translation to recompile to native.
16086 */
16087DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
16088{
16089 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
16090
16091 /*
16092 * The first time thru, we allocate the recompiler state, the other times
16093 * we just need to reset it before using it again.
16094 */
16095 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
16096 if (RT_LIKELY(pReNative))
16097 iemNativeReInit(pReNative, pTb);
16098 else
16099 {
16100 pReNative = iemNativeInit(pVCpu, pTb);
16101 AssertReturn(pReNative, pTb);
16102 }
16103
16104#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16105 /*
16106 * First do liveness analysis. This is done backwards.
16107 */
16108 {
16109 uint32_t idxCall = pTb->Thrd.cCalls;
16110 if (idxCall <= pReNative->cLivenessEntriesAlloc)
16111 { /* likely */ }
16112 else
16113 {
16114 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
16115 while (idxCall > cAlloc)
16116 cAlloc *= 2;
16117 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
16118 AssertReturn(pvNew, pTb);
16119 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
16120 pReNative->cLivenessEntriesAlloc = cAlloc;
16121 }
16122 AssertReturn(idxCall > 0, pTb);
16123 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
16124
16125 /* The initial (final) entry. */
16126 idxCall--;
16127 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
16128
16129 /* Loop backwards thru the calls and fill in the other entries. */
16130 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
16131 while (idxCall > 0)
16132 {
16133 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
16134 if (pfnLiveness)
16135 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
16136 else
16137 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
16138 pCallEntry--;
16139 idxCall--;
16140 }
16141
16142# ifdef VBOX_WITH_STATISTICS
16143 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
16144 to 'clobbered' rather that 'input'. */
16145 /** @todo */
16146# endif
16147 }
16148#endif
16149
16150 /*
16151 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
16152 * for aborting if an error happens.
16153 */
16154 uint32_t cCallsLeft = pTb->Thrd.cCalls;
16155#ifdef LOG_ENABLED
16156 uint32_t const cCallsOrg = cCallsLeft;
16157#endif
16158 uint32_t off = 0;
16159 int rc = VINF_SUCCESS;
16160 IEMNATIVE_TRY_SETJMP(pReNative, rc)
16161 {
16162 /*
16163 * Emit prolog code (fixed).
16164 */
16165 off = iemNativeEmitProlog(pReNative, off);
16166
16167 /*
16168 * Convert the calls to native code.
16169 */
16170#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16171 int32_t iGstInstr = -1;
16172#endif
16173#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
16174 uint32_t cThreadedCalls = 0;
16175 uint32_t cRecompiledCalls = 0;
16176#endif
16177#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16178 uint32_t idxCurCall = 0;
16179#endif
16180 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
16181 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
16182 while (cCallsLeft-- > 0)
16183 {
16184 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
16185#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16186 pReNative->idxCurCall = idxCurCall;
16187#endif
16188
16189 /*
16190 * Debug info, assembly markup and statistics.
16191 */
16192#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
16193 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
16194 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
16195#endif
16196#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16197 iemNativeDbgInfoAddNativeOffset(pReNative, off);
16198 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
16199 {
16200 if (iGstInstr < (int32_t)pTb->cInstructions)
16201 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
16202 else
16203 Assert(iGstInstr == pTb->cInstructions);
16204 iGstInstr = pCallEntry->idxInstr;
16205 }
16206 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
16207#endif
16208#if defined(VBOX_STRICT)
16209 off = iemNativeEmitMarker(pReNative, off,
16210 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
16211#endif
16212#if defined(VBOX_STRICT)
16213 iemNativeRegAssertSanity(pReNative);
16214#endif
16215#ifdef VBOX_WITH_STATISTICS
16216 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
16217#endif
16218
16219 /*
16220 * Actual work.
16221 */
16222 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
16223 pfnRecom ? "(recompiled)" : "(todo)"));
16224 if (pfnRecom) /** @todo stats on this. */
16225 {
16226 off = pfnRecom(pReNative, off, pCallEntry);
16227 STAM_REL_STATS({cRecompiledCalls++;});
16228 }
16229 else
16230 {
16231 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
16232 STAM_REL_STATS({cThreadedCalls++;});
16233 }
16234 Assert(off <= pReNative->cInstrBufAlloc);
16235 Assert(pReNative->cCondDepth == 0);
16236
16237#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
16238 if (LogIs2Enabled())
16239 {
16240 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
16241# ifndef IEMLIVENESS_EXTENDED_LAYOUT
16242 static const char s_achState[] = "CUXI";
16243# else
16244 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
16245# endif
16246
16247 char szGpr[17];
16248 for (unsigned i = 0; i < 16; i++)
16249 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
16250 szGpr[16] = '\0';
16251
16252 char szSegBase[X86_SREG_COUNT + 1];
16253 char szSegLimit[X86_SREG_COUNT + 1];
16254 char szSegAttrib[X86_SREG_COUNT + 1];
16255 char szSegSel[X86_SREG_COUNT + 1];
16256 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
16257 {
16258 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
16259 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
16260 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
16261 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
16262 }
16263 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
16264 = szSegSel[X86_SREG_COUNT] = '\0';
16265
16266 char szEFlags[8];
16267 for (unsigned i = 0; i < 7; i++)
16268 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
16269 szEFlags[7] = '\0';
16270
16271 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
16272 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
16273 }
16274#endif
16275
16276 /*
16277 * Advance.
16278 */
16279 pCallEntry++;
16280#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16281 idxCurCall++;
16282#endif
16283 }
16284
16285 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
16286 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
16287 if (!cThreadedCalls)
16288 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
16289
16290 /*
16291 * Emit the epilog code.
16292 */
16293 uint32_t idxReturnLabel;
16294 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
16295
16296 /*
16297 * Generate special jump labels.
16298 */
16299 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
16300 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
16301 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
16302 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
16303 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
16304 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
16305 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
16306 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
16307 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
16308 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
16309 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
16310 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
16311 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
16312 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
16313 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
16314 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
16315 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
16316 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
16317 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
16318 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
16319 }
16320 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
16321 {
16322 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
16323 return pTb;
16324 }
16325 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
16326 Assert(off <= pReNative->cInstrBufAlloc);
16327
16328 /*
16329 * Make sure all labels has been defined.
16330 */
16331 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
16332#ifdef VBOX_STRICT
16333 uint32_t const cLabels = pReNative->cLabels;
16334 for (uint32_t i = 0; i < cLabels; i++)
16335 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
16336#endif
16337
16338 /*
16339 * Allocate executable memory, copy over the code we've generated.
16340 */
16341 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
16342 if (pTbAllocator->pDelayedFreeHead)
16343 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
16344
16345 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
16346 AssertReturn(paFinalInstrBuf, pTb);
16347 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
16348
16349 /*
16350 * Apply fixups.
16351 */
16352 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
16353 uint32_t const cFixups = pReNative->cFixups;
16354 for (uint32_t i = 0; i < cFixups; i++)
16355 {
16356 Assert(paFixups[i].off < off);
16357 Assert(paFixups[i].idxLabel < cLabels);
16358 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
16359 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
16360 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
16361 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
16362 switch (paFixups[i].enmType)
16363 {
16364#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
16365 case kIemNativeFixupType_Rel32:
16366 Assert(paFixups[i].off + 4 <= off);
16367 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16368 continue;
16369
16370#elif defined(RT_ARCH_ARM64)
16371 case kIemNativeFixupType_RelImm26At0:
16372 {
16373 Assert(paFixups[i].off < off);
16374 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16375 Assert(offDisp >= -262144 && offDisp < 262144);
16376 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
16377 continue;
16378 }
16379
16380 case kIemNativeFixupType_RelImm19At5:
16381 {
16382 Assert(paFixups[i].off < off);
16383 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16384 Assert(offDisp >= -262144 && offDisp < 262144);
16385 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
16386 continue;
16387 }
16388
16389 case kIemNativeFixupType_RelImm14At5:
16390 {
16391 Assert(paFixups[i].off < off);
16392 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16393 Assert(offDisp >= -8192 && offDisp < 8192);
16394 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
16395 continue;
16396 }
16397
16398#endif
16399 case kIemNativeFixupType_Invalid:
16400 case kIemNativeFixupType_End:
16401 break;
16402 }
16403 AssertFailed();
16404 }
16405
16406 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
16407 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
16408
16409 /*
16410 * Convert the translation block.
16411 */
16412 RTMemFree(pTb->Thrd.paCalls);
16413 pTb->Native.paInstructions = paFinalInstrBuf;
16414 pTb->Native.cInstructions = off;
16415 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
16416#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16417 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
16418 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
16419#endif
16420
16421 Assert(pTbAllocator->cThreadedTbs > 0);
16422 pTbAllocator->cThreadedTbs -= 1;
16423 pTbAllocator->cNativeTbs += 1;
16424 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
16425
16426#ifdef LOG_ENABLED
16427 /*
16428 * Disassemble to the log if enabled.
16429 */
16430 if (LogIs3Enabled())
16431 {
16432 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
16433 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
16434# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
16435 RTLogFlush(NULL);
16436# endif
16437 }
16438#endif
16439 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
16440
16441 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
16442 return pTb;
16443}
16444
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette