VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerTlbLookup.h@ 106952

Last change on this file since 106952 was 106445, checked in by vboxsync, 3 months ago

VMM/IEM: Pass down a_fFlat to the IEMNATIVEEMITTLBSTATE constructor used for memory read/write/map. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 63.9 KB
Line 
1/* $Id: IEMN8veRecompilerTlbLookup.h 106445 2024-10-17 12:31:32Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler TLB Lookup Code Emitter.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerTlbLookup_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerTlbLookup_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35#include "IEMN8veRecompilerEmit.h"
36
37
38/** @defgroup grp_iem_n8ve_re_tlblookup Native Recompiler TLB Lookup Code Emitter
39 * @ingroup grp_iem_n8ve_re
40 * @{
41 */
42
43/*
44 * TLB Lookup config.
45 */
46#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
47# define IEMNATIVE_WITH_TLB_LOOKUP
48#endif
49#ifdef IEMNATIVE_WITH_TLB_LOOKUP
50# define IEMNATIVE_WITH_TLB_LOOKUP_FETCH
51#endif
52#ifdef IEMNATIVE_WITH_TLB_LOOKUP
53# define IEMNATIVE_WITH_TLB_LOOKUP_STORE
54#endif
55#ifdef IEMNATIVE_WITH_TLB_LOOKUP
56# define IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
57#endif
58#ifdef IEMNATIVE_WITH_TLB_LOOKUP
59# define IEMNATIVE_WITH_TLB_LOOKUP_PUSH
60#endif
61#ifdef IEMNATIVE_WITH_TLB_LOOKUP
62# define IEMNATIVE_WITH_TLB_LOOKUP_POP
63#endif
64
65
66/**
67 * This must be instantiate *before* branching off to the lookup code,
68 * so that register spilling and whatnot happens for everyone.
69 */
70typedef struct IEMNATIVEEMITTLBSTATE
71{
72 bool const fSkip;
73 uint8_t const idxRegPtrHlp; /**< We don't support immediate variables with register assignment, so this a tmp reg alloc. */
74 uint8_t const idxRegPtr;
75 uint8_t const idxRegSegBase;
76 uint8_t const idxRegSegLimit;
77 uint8_t const idxRegSegAttrib;
78 uint8_t const idxReg1;
79 uint8_t const idxReg2;
80#if defined(RT_ARCH_ARM64)
81 uint8_t const idxReg3;
82/** @def IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
83 * Use LDP and STDP to reduce number of instructions accessing memory at the
84 * cost of using more registers. This will typically reduce the number of
85 * instructions emitted as well.
86 * @todo Profile this and ensure that it performs the same or better.
87 */
88# define IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
89# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
90 uint8_t const idxReg4;
91 uint8_t const idxReg5;
92# endif
93#endif
94 uint64_t const uAbsPtr;
95
96 IEMNATIVEEMITTLBSTATE(PIEMRECOMPILERSTATE a_pReNative, uint32_t *a_poff, uint8_t a_idxVarGCPtrMem,
97 uint8_t const a_iSegReg, bool const a_fFlat, uint8_t const a_cbMem, uint8_t const a_offDisp = 0)
98#ifdef IEMNATIVE_WITH_TLB_LOOKUP
99 /* 32-bit and 64-bit wraparound will require special handling, so skip these for absolute addresses. */
100 : fSkip( a_pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(a_idxVarGCPtrMem)].enmKind
101 == kIemNativeVarKind_Immediate
102 && ( (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT
103 ? (uint64_t)(UINT32_MAX - a_cbMem - a_offDisp)
104 : (uint64_t)(UINT64_MAX - a_cbMem - a_offDisp))
105 < a_pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(a_idxVarGCPtrMem)].u.uValue)
106#else
107 : fSkip(true)
108#endif
109#if defined(RT_ARCH_AMD64) /* got good immediate encoding, otherwise we just load the address in a reg immediately. */
110 , idxRegPtrHlp(UINT8_MAX)
111#else
112 , idxRegPtrHlp( a_pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(a_idxVarGCPtrMem)].enmKind
113 != kIemNativeVarKind_Immediate
114 || fSkip
115 ? UINT8_MAX
116 : iemNativeRegAllocTmpImm(a_pReNative, a_poff,
117 a_pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(a_idxVarGCPtrMem)].u.uValue))
118#endif
119 , idxRegPtr( a_pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(a_idxVarGCPtrMem)].enmKind
120 != kIemNativeVarKind_Immediate
121 && !fSkip
122 ? iemNativeVarRegisterAcquireInitedWithPref(a_pReNative, a_idxVarGCPtrMem, a_poff,
123 IEMNATIVE_CALL_ARG2_GREG)
124 : idxRegPtrHlp)
125 , idxRegSegBase(a_fFlat || a_iSegReg == UINT8_MAX || fSkip
126 ? UINT8_MAX
127 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_BASE(a_iSegReg)))
128 , idxRegSegLimit(a_fFlat || a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT || fSkip
129 ? UINT8_MAX
130 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_LIMIT(a_iSegReg)))
131 , idxRegSegAttrib(a_fFlat || a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT || fSkip
132 ? UINT8_MAX
133 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_ATTRIB(a_iSegReg)))
134 , idxReg1(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
135 , idxReg2(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
136#if defined(RT_ARCH_ARM64)
137 , idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
138# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
139 , idxReg4(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
140 , idxReg5(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
141# endif
142#endif
143 , uAbsPtr( a_pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(a_idxVarGCPtrMem)].enmKind
144 != kIemNativeVarKind_Immediate
145 || fSkip
146 ? UINT64_MAX
147 : a_pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(a_idxVarGCPtrMem)].u.uValue)
148
149 {
150 Assert(a_fFlat ? a_iSegReg == UINT8_MAX : a_iSegReg != UINT8_MAX);
151 RT_NOREF(a_offDisp);
152 }
153
154 /* Alternative constructor for PUSH and POP where we don't have a GCPtrMem
155 variable, only a register derived from the guest RSP. */
156 IEMNATIVEEMITTLBSTATE(PIEMRECOMPILERSTATE a_pReNative, uint8_t a_idxRegPtr, uint32_t *a_poff,
157 uint8_t a_iSegReg, uint8_t a_cbMem)
158#ifdef IEMNATIVE_WITH_TLB_LOOKUP
159 : fSkip(false)
160#else
161 : fSkip(true)
162#endif
163 , idxRegPtrHlp(UINT8_MAX)
164 , idxRegPtr(a_idxRegPtr)
165 , idxRegSegBase(a_iSegReg == UINT8_MAX || fSkip
166 ? UINT8_MAX
167 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_BASE(a_iSegReg)))
168 , idxRegSegLimit((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
169 ? UINT8_MAX
170 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_LIMIT(a_iSegReg)))
171 , idxRegSegAttrib((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
172 ? UINT8_MAX
173 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_ATTRIB(a_iSegReg)))
174 , idxReg1(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
175 , idxReg2(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
176#if defined(RT_ARCH_ARM64)
177 , idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
178# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
179 , idxReg4(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
180 , idxReg5(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
181# endif
182#endif
183 , uAbsPtr(UINT64_MAX)
184
185 {
186 RT_NOREF_PV(a_cbMem);
187 }
188
189 /* Alternative constructor for the code TLB lookups where we implictly use RIP
190 variable, only a register derived from the guest RSP. */
191 IEMNATIVEEMITTLBSTATE(PIEMRECOMPILERSTATE a_pReNative, bool a_fFlat, uint32_t *a_poff)
192#ifdef IEMNATIVE_WITH_TLB_LOOKUP
193 : fSkip(false)
194#else
195 : fSkip(true)
196#endif
197 , idxRegPtrHlp(UINT8_MAX)
198 , idxRegPtr(iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, kIemNativeGstReg_Pc))
199 , idxRegSegBase(a_fFlat || fSkip
200 ? UINT8_MAX
201 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS)))
202 , idxRegSegLimit(/*a_fFlat || fSkip
203 ? UINT8_MAX
204 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS))*/
205 UINT8_MAX)
206 , idxRegSegAttrib(UINT8_MAX)
207 , idxReg1(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
208 , idxReg2(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
209#if defined(RT_ARCH_ARM64)
210 , idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
211# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
212 , idxReg4(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
213 , idxReg5(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
214# endif
215#endif
216 , uAbsPtr(UINT64_MAX)
217
218 {
219 }
220
221 void freeRegsAndReleaseVars(PIEMRECOMPILERSTATE a_pReNative, uint8_t idxVarGCPtrMem = UINT8_MAX, bool fIsCode = false) const
222 {
223 if (!fIsCode)
224 {
225 if (idxRegPtr != UINT8_MAX)
226 {
227 if (idxRegPtrHlp == UINT8_MAX)
228 {
229 if (idxVarGCPtrMem != UINT8_MAX)
230 iemNativeVarRegisterRelease(a_pReNative, idxVarGCPtrMem);
231 }
232 else
233 {
234 Assert(idxRegPtrHlp == idxRegPtr);
235 iemNativeRegFreeTmpImm(a_pReNative, idxRegPtrHlp);
236 }
237 }
238 else
239 Assert(idxRegPtrHlp == UINT8_MAX);
240 }
241 else
242 {
243 Assert(idxVarGCPtrMem == UINT8_MAX);
244 Assert(idxRegPtrHlp == UINT8_MAX);
245 iemNativeRegFreeTmp(a_pReNative, idxRegPtr); /* RIP */
246 }
247 if (idxRegSegBase != UINT8_MAX)
248 iemNativeRegFreeTmp(a_pReNative, idxRegSegBase);
249 if (idxRegSegLimit != UINT8_MAX)
250 iemNativeRegFreeTmp(a_pReNative, idxRegSegLimit);
251 if (idxRegSegAttrib != UINT8_MAX)
252 iemNativeRegFreeTmp(a_pReNative, idxRegSegAttrib);
253#if defined(RT_ARCH_ARM64)
254# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
255 iemNativeRegFreeTmp(a_pReNative, idxReg5);
256 iemNativeRegFreeTmp(a_pReNative, idxReg4);
257# endif
258 iemNativeRegFreeTmp(a_pReNative, idxReg3);
259#endif
260 iemNativeRegFreeTmp(a_pReNative, idxReg2);
261 iemNativeRegFreeTmp(a_pReNative, idxReg1);
262
263 }
264
265 uint32_t getRegsNotToSave() const
266 {
267 if (!fSkip)
268 return RT_BIT_32(idxReg1)
269 | RT_BIT_32(idxReg2)
270#if defined(RT_ARCH_ARM64)
271 | RT_BIT_32(idxReg3)
272# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
273 | RT_BIT_32(idxReg4)
274 | RT_BIT_32(idxReg5)
275# endif
276#endif
277 ;
278 return 0;
279 }
280
281 /** This is only for avoid assertions. */
282 uint32_t getActiveRegsWithShadows(bool fCode = false) const
283 {
284#ifdef VBOX_STRICT
285 if (!fSkip)
286 return (idxRegSegBase != UINT8_MAX ? RT_BIT_32(idxRegSegBase) : 0)
287 | (idxRegSegLimit != UINT8_MAX ? RT_BIT_32(idxRegSegLimit) : 0)
288 | (idxRegSegAttrib != UINT8_MAX ? RT_BIT_32(idxRegSegAttrib) : 0)
289 | (fCode ? RT_BIT_32(idxRegPtr) : 0);
290#else
291 RT_NOREF_PV(fCode);
292#endif
293 return 0;
294 }
295} IEMNATIVEEMITTLBSTATE;
296
297DECLASM(void) iemNativeHlpAsmSafeWrapCheckTlbLookup(void);
298
299
300#ifdef IEMNATIVE_WITH_TLB_LOOKUP
301/**
302 *
303 * @returns New @a off value.
304 * @param pReNative .
305 * @param off .
306 * @param pTlbState .
307 * @param iSegReg .
308 * @param idxLabelTlbLookup .
309 * @param idxLabelTlbMiss .
310 * @param idxRegMemResult .
311 * @param offDisp .
312 * @tparam a_cbMem .
313 * @tparam a_fAlignMaskAndCtl The low 8-bit is the alignment mask, ie. a
314 * 128-bit aligned access passes 15. This is only
315 * applied to ring-3 code, when dictated by the
316 * control bits and for atomic accesses.
317 *
318 * The other bits are used for alignment control:
319 * - IEM_MEMMAP_F_ALIGN_GP
320 * - IEM_MEMMAP_F_ALIGN_SSE
321 * - IEM_MEMMAP_F_ALIGN_GP_OR_AC
322 * Any non-zero upper bits means we will go to
323 * tlbmiss on anything out of alignment according
324 * to the mask in the low 8 bits.
325 * @tparam a_fAccess .
326 * @tparam a_fDataTlb .
327 * @tparam a_fNoReturn .
328 */
329template<bool const a_fDataTlb, const uint8_t a_cbMem, uint32_t a_fAlignMaskAndCtl, uint32_t a_fAccess,
330 bool const a_fNoReturn = false>
331DECL_INLINE_THROW(uint32_t)
332iemNativeEmitTlbLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEEMITTLBSTATE const * const pTlbState,
333 uint8_t iSegReg, uint32_t idxLabelTlbLookup, uint32_t idxLabelTlbMiss, uint8_t idxRegMemResult,
334 uint8_t offDisp = 0)
335{
336 Assert(!pTlbState->fSkip);
337 uint32_t const offVCpuTlb = a_fDataTlb ? RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb) : RT_UOFFSETOF(VMCPUCC, iem.s.CodeTlb);
338# if defined(RT_ARCH_AMD64)
339 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 512);
340# elif defined(RT_ARCH_ARM64)
341 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 96);
342# endif
343
344 /*
345 * The expand down check isn't use all that much, so we emit here to keep
346 * the lookup straighter.
347 */
348 /* check_expand_down: ; complicted! */
349 uint32_t const offCheckExpandDown = off;
350 uint32_t offFixupLimitDone = 0;
351 if (a_fDataTlb && iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
352 {
353 /* cmp seglim, regptr */
354 if (pTlbState->idxRegPtr != UINT8_MAX && offDisp == 0)
355 off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, pTlbState->idxRegPtr);
356 else if (pTlbState->idxRegPtr == UINT8_MAX)
357 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit,
358 (uint32_t)(pTlbState->uAbsPtr + offDisp));
359 else if RT_CONSTEXPR_IF(a_cbMem == 1)
360 off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, pTlbState->idxReg2);
361 else
362 { /* use idxRegMemResult to calc the displaced address. */
363 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxRegPtr, offDisp);
364 off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, idxRegMemResult);
365 }
366 /* ja tlbmiss */
367 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
368
369 /* reg1 = segattr & X86DESCATTR_D (0x4000) */
370 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib, X86DESCATTR_D);
371 /* xor reg1, X86DESCATTR_D */
372 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_D);
373 /* shl reg1, 2 (16 - 14) */
374 AssertCompile((X86DESCATTR_D << 2) == UINT32_C(0x10000));
375 off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, pTlbState->idxReg1, 2);
376 /* dec reg1 (=> 0xffff if D=0; 0xffffffff if D=1) */
377 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, 1);
378 /* cmp reg1, reg2 (64-bit) / imm (32-bit) */
379 if (pTlbState->idxRegPtr != UINT8_MAX)
380 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1,
381 a_cbMem > 1 || offDisp != 0 ? pTlbState->idxReg2 : pTlbState->idxRegPtr);
382 else
383 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1,
384 (uint32_t)(pTlbState->uAbsPtr + offDisp + a_cbMem - 1)); /* fSkip=true on overflow. */
385 /* jbe tlbmiss */
386 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
387 /* jmp limitdone */
388 offFixupLimitDone = off;
389 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off + 256 /* force near */);
390 }
391
392 /*
393 * Snippet for checking whether misaligned accesses are within the
394 * page (see step 2).
395 *
396 * This sequence is 1 instruction longer than the strict alignment test,
397 * and since most accesses are correctly aligned it is better to do it
398 * this way. Runs of r163597 seems to indicate there was a regression
399 * when placing this code in the main code flow.
400 */
401 uint8_t const idxRegFlatPtr = iSegReg != UINT8_MAX || pTlbState->idxRegPtr == UINT8_MAX || offDisp != 0
402 ? idxRegMemResult : pTlbState->idxRegPtr; /* (not immediately ready for tlblookup use) */
403 RT_CONSTEXPR
404 uint8_t const fAlignMask = a_fDataTlb ? (uint8_t)(a_fAlignMaskAndCtl & 0xff) : 0;
405 if (a_fDataTlb)
406 {
407 AssertCompile(!(a_fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_SSE | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_GP_OR_AC)));
408 Assert(RT_IS_POWER_OF_TWO(fAlignMask + 1U));
409 Assert(a_cbMem == fAlignMask + 1U || !(a_fAccess & IEM_ACCESS_ATOMIC));
410 Assert(a_cbMem < 128); /* alignment test assumptions */
411 }
412
413 uint32_t offMisalignedAccess = UINT32_MAX;
414 uint32_t offFixupMisalignedAccessJmpBack = UINT32_MAX;
415 if ( a_fDataTlb
416 && !(a_fAlignMaskAndCtl & ~UINT32_C(0xff))
417 && !(a_fAccess & IEM_ACCESS_ATOMIC)
418 && a_cbMem > 1
419 && RT_IS_POWER_OF_TWO(a_cbMem)
420 && !(pReNative->fExec & IEM_F_X86_AC))
421 {
422 /* tlbmisaligned: */
423 offMisalignedAccess = off;
424 /* reg1 = regflat & 0xfff */
425 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,/*=*/ idxRegFlatPtr,/*&*/ GUEST_PAGE_OFFSET_MASK);
426 /* cmp reg1, GUEST_PAGE_SIZE - a_cbMem */
427 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE - a_cbMem);
428 /* jbe short jmpback */
429 offFixupMisalignedAccessJmpBack = off;
430 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 256 /*near*/, kIemNativeInstrCond_be);
431# ifdef IEM_WITH_TLB_STATISTICS
432 off = iemNativeEmitIncU32CounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
433 offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissCrossPage));
434# endif
435 off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
436 }
437
438 /* The ODD TLB entry is checked last when CR4.PGE=0 or when not in ring-0. */
439 bool const fEvenFirst = (pReNative->fExec & IEM_F_X86_CPL_MASK) != 0
440 || !(pReNative->pVCpu->cpum.GstCtx.cr4 & X86_CR4_PGE);
441 bool const fIncCheckAltTlbe = (pReNative->fExec & IEM_F_X86_CPL_MASK) == 0;
442
443 /*
444 * Snippet for checking the alternative TLBE entry when CR4.PGE=1 and
445 * for doing statistics.
446 *
447 * This code assists step 3c, so look down there for register assignments.
448 */
449 /* checkalttlbe_and_missedtagstats: */
450 uint32_t const offCheckAltTlbeAndMissedTagStats = off;
451 uint32_t offFixupCheckAltTlbeJmpBack = UINT32_MAX / 2;
452 if (fIncCheckAltTlbe)
453 {
454# ifdef RT_ARCH_AMD64
455 /* Update pTlbe: reg2 = fEvenFirst ? reg2 + sizeof(IEMTLBENTRY) : reg2 - sizeof(IEMTLBENTRY); */
456 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
457 pCodeBuf[off++] = 0x8d; /* LEA r64,m64 */
458 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg2, pTlbState->idxReg2,
459 fEvenFirst ? (int32_t)sizeof(IEMTLBENTRY) : -(int32_t)sizeof(IEMTLBENTRY));
460
461 /* reg1 = reg1 & ~IEMTLB_REVISION_MASK; */
462 off = iemNativeEmitShiftGprLeftEx(pCodeBuf, off, pTlbState->idxReg1, 16 + GUEST_PAGE_SHIFT);
463 off = iemNativeEmitShiftGprRightEx(pCodeBuf, off, pTlbState->idxReg1, 16 + GUEST_PAGE_SHIFT);
464 /* or reg1, [qword pVCpu->iem.s.DataTlb.uTlbRevisionGlobal/uTlbRevision] */
465 pCodeBuf[off++] = pTlbState->idxReg1 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
466 pCodeBuf[off++] = 0x0b; /* OR r64,r/m64 */
467 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, pTlbState->idxReg1,
468 fEvenFirst ? offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevisionGlobal)
469 : offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevision));
470
471 /* cmp reg1, [reg2] */
472 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
473 pCodeBuf[off++] = 0x3b;
474 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
475
476# elif defined(RT_ARCH_ARM64)
477 /* reg3 = uTlbRevision/uTlbRevisionGlobal; (We've ditched reg4 already, so have to get it via pVCpu.) */
478 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3,
479 fEvenFirst ? offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevisionGlobal)
480 : offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevision));
481
482 /* reg1 = reg1 & ~IEMTLB_REVISION_MASK; */
483 AssertCompile(UINT64_C(0x0000000fffffffff) == ~IEMTLB_REVISION_MASK);
484 Assert(Armv8A64ConvertImmRImmS2Mask64(0x63, 0) == ~IEMTLB_REVISION_MASK);
485 pCodeBuf[off++] = Armv8A64MkInstrAndImm(pTlbState->idxReg1, pTlbState->idxReg1, 0x63, 0);
486
487 /* reg1 |= reg3 (uTlbRevision/uTlbRevisionGlobal); */
488 pCodeBuf[off++] = Armv8A64MkInstrOrr(pTlbState->idxReg1, pTlbState->idxReg1, pTlbState->idxReg3);
489
490 /* reg2 = reg2 +/- sizeof(IEMTLBENTRY); via preindexing.
491 reg3 = uTag; [pair: reg4 = fFlagsAndPhysRev;] */
492 AssertCompileMemberOffset(IEMTLBENTRY, uTag, 0);
493# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
494 AssertCompileAdjacentMembers(IEMTLBENTRY, uTag, fFlagsAndPhysRev);
495 pCodeBuf[off++] = Armv8A64MkInstrLdPairGpr(pTlbState->idxReg3, pTlbState->idxReg4, pTlbState->idxReg2,
496 fEvenFirst ? (int)sizeof(IEMTLBENTRY) / 8 : -(int)sizeof(IEMTLBENTRY) / 8,
497 kArm64InstrStLdPairType_PreIndex);
498# else
499 pCodeBuf[off++] = Armv8A64MkInstrStrLdrPreIndex9(kArmv8A64InstrLdStType_Ld_Dword, pTlbState->idxReg3, pTlbState->idxReg2,
500 fEvenFirst ? (int)sizeof(IEMTLBENTRY) / 8 : -(int)sizeof(IEMTLBENTRY) / 8);
501# endif
502 /* cmp reg1, reg3; (uRev | Hash(FlatPtr), IEMTLBENTRY::uTag)*/
503 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
504
505# else
506# error "portme"
507# endif
508 /* je near jumpback_checkalttlbe */
509 offFixupCheckAltTlbeJmpBack = off;
510 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 256, kIemNativeInstrCond_e);
511 }
512
513# ifdef IEM_WITH_TLB_STATISTICS
514 /* inc stat */
515 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
516 offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissTag));
517# endif
518# ifndef IEM_WITH_TLB_STATISTICS
519 if (fIncCheckAltTlbe)
520# endif
521 off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
522 off = iemNativeEmitBrkEx(pCodeBuf, off, 0x7679);
523
524 /*
525 * tlblookup:
526 */
527 iemNativeLabelDefine(pReNative, idxLabelTlbLookup, off);
528# if defined(RT_ARCH_ARM64) && 0
529 off = iemNativeEmitBrkEx(pCodeBuf, off, 0);
530# endif
531
532 /*
533 * 1. Segmentation.
534 *
535 * 1a. Check segment limit and attributes if non-flat 32-bit code. This is complicated.
536 *
537 * This can be skipped for code TLB lookups because limit is checked by jmp, call,
538 * ret, and iret prior to making it. It is also checked by the helpers prior to
539 * doing TLB loading.
540 */
541 if (a_fDataTlb && iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
542 {
543 /* Check that we've got a segment loaded and that it allows the access.
544 For write access this means a writable data segment.
545 For read-only accesses this means a readable code segment or any data segment. */
546 if RT_CONSTEXPR_IF((a_fAccess & IEM_ACCESS_TYPE_WRITE) != 0)
547 {
548 uint32_t const fMustBe1 = X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_WRITE;
549 uint32_t const fMustBe0 = X86DESCATTR_UNUSABLE | X86_SEL_TYPE_CODE;
550 /* reg1 = segattrs & (must1|must0) */
551 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,
552 pTlbState->idxRegSegAttrib, fMustBe1 | fMustBe0);
553 /* cmp reg1, must1 */
554 AssertCompile(fMustBe1 <= UINT16_MAX);
555 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, fMustBe1);
556 /* jne tlbmiss */
557 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
558 }
559 else
560 {
561 /* U | !P |!DT |!CD | RW |
562 16 | 8 | 4 | 3 | 1 |
563 -------------------------------
564 0 | 0 | 0 | 0 | 0 | execute-only code segment. - must be excluded
565 0 | 0 | 0 | 0 | 1 | execute-read code segment.
566 0 | 0 | 0 | 1 | 0 | read-only data segment.
567 0 | 0 | 0 | 1 | 1 | read-write data segment. - last valid combination
568 */
569 /* reg1 = segattrs & (relevant attributes) */
570 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib,
571 X86DESCATTR_UNUSABLE | X86DESCATTR_P | X86DESCATTR_DT
572 | X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE);
573 /* xor reg1, X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE ; place C=1 RW=0 at the bottom & limit the range.
574 ; EO-code=0, ER-code=2, RO-data=8, RW-data=10 */
575#ifdef RT_ARCH_ARM64
576 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_DT | X86_SEL_TYPE_CODE);
577 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_P);
578#else
579 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1,
580 X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE);
581#endif
582 /* sub reg1, X86_SEL_TYPE_WRITE ; EO-code=-2, ER-code=0, RO-data=6, RW-data=8 */
583 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_WRITE /* ER-code */);
584 /* cmp reg1, X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE */
585 AssertCompile(X86_SEL_TYPE_CODE == 8);
586 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_CODE);
587 /* ja tlbmiss */
588 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
589 }
590
591 /* If we're accessing more than one byte or if we're working with a non-zero offDisp,
592 put the last address we'll be accessing in idxReg2 (64-bit). */
593 if ((a_cbMem > 1 || offDisp != 0) && pTlbState->idxRegPtr != UINT8_MAX)
594 {
595 if (!offDisp)
596 /* reg2 = regptr + a_cbMem - 1; 64-bit result so we can fend of wraparounds/overflows. */
597 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off,
598 pTlbState->idxReg2,/*=*/ pTlbState->idxRegPtr,/*+*/ a_cbMem - 1);
599 else
600 {
601 /* reg2 = (uint32_t)(regptr + offDisp) + a_cbMem - 1;. */
602 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off,
603 pTlbState->idxReg2,/*=*/ pTlbState->idxRegPtr,/*+*/ + offDisp);
604 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, pTlbState->idxReg2, a_cbMem - 1);
605 }
606 }
607
608 /*
609 * Check the limit. If this is a write access, we know that it's a
610 * data segment and includes the expand_down bit. For read-only accesses
611 * we need to check that code/data=0 and expanddown=1 before continuing.
612 */
613 if RT_CONSTEXPR_IF((a_fAccess & IEM_ACCESS_TYPE_WRITE) != 0)
614 {
615 /* test segattrs, X86_SEL_TYPE_DOWN */
616 AssertCompile(X86_SEL_TYPE_DOWN < 128);
617 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, pTlbState->idxRegSegAttrib, X86_SEL_TYPE_DOWN);
618 /* jnz check_expand_down */
619 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_ne);
620 }
621 else
622 {
623 /* reg1 = segattr & (code | down) */
624 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,
625 pTlbState->idxRegSegAttrib, X86_SEL_TYPE_CODE | X86_SEL_TYPE_DOWN);
626 /* cmp reg1, down */
627 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_DOWN);
628 /* je check_expand_down */
629 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_e);
630 }
631
632 /* expand_up:
633 cmp seglim, regptr/reg2/imm */
634 if (pTlbState->idxRegPtr != UINT8_MAX)
635 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit,
636 a_cbMem > 1 || offDisp != 0 ? pTlbState->idxReg2 : pTlbState->idxRegPtr);
637 else
638 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit,
639 (uint32_t)pTlbState->uAbsPtr + offDisp + a_cbMem - 1U); /* fSkip=true on overflow. */
640 /* jbe tlbmiss */
641 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
642
643 /* limitdone: */
644 iemNativeFixupFixedJump(pReNative, offFixupLimitDone, off);
645 }
646
647 /* 1b. Add the segment base. We use idxRegMemResult for the ptr register if
648 this step is required or if the address is a constant (simplicity) or
649 if offDisp is non-zero. */
650 if (iSegReg != UINT8_MAX)
651 {
652 Assert(idxRegFlatPtr != pTlbState->idxRegPtr);
653 /* regflat = segbase + regptr/imm */
654 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
655 {
656 Assert(iSegReg >= X86_SREG_FS);
657 if (pTlbState->idxRegPtr != UINT8_MAX)
658 {
659 off = iemNativeEmitGprEqGprPlusGprEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase, pTlbState->idxRegPtr);
660 if (offDisp != 0)
661 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxRegFlatPtr, offDisp);
662 }
663 else
664 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase,
665 pTlbState->uAbsPtr + offDisp);
666 }
667 else if (pTlbState->idxRegPtr != UINT8_MAX)
668 {
669 off = iemNativeEmitGpr32EqGprPlusGprEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase, pTlbState->idxRegPtr);
670 if (offDisp != 0)
671 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegFlatPtr, offDisp);
672 }
673 else
674 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr,
675 pTlbState->idxRegSegBase, (uint32_t)pTlbState->uAbsPtr + offDisp);
676 }
677 else if (pTlbState->idxRegPtr == UINT8_MAX)
678 {
679 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
680 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->uAbsPtr + offDisp);
681 else
682 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxRegFlatPtr, (uint32_t)pTlbState->uAbsPtr + offDisp);
683 }
684 else if (offDisp != 0)
685 {
686 Assert(idxRegFlatPtr != pTlbState->idxRegPtr);
687 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
688 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegPtr, offDisp);
689 else
690 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegPtr, offDisp);
691 }
692 else
693 Assert(idxRegFlatPtr == pTlbState->idxRegPtr);
694
695 /*
696 * 2. Check that the address doesn't cross a page boundrary and doesn't
697 * have alignment issues (not applicable to code).
698 *
699 * For regular accesses (non-SSE/AVX & atomic stuff) we only need to
700 * check for #AC in ring-3 code. To simplify this, the need for AC
701 * checking is indicated by IEM_F_X86_AC in IEMCPU::fExec.
702 *
703 * The caller informs us about about SSE/AVX aligned accesses via the
704 * upper bits of a_fAlignMaskAndCtl and atomic accesses via a_fAccess.
705 */
706 if (a_fDataTlb)
707 {
708 if (offMisalignedAccess != UINT32_MAX)
709 {
710#ifdef RT_ARCH_ARM64
711 if RT_CONSTEXPR_IF(a_cbMem == 2)
712 {
713 /* tbnz regflatptr, #0, tlbmiss */
714 pCodeBuf[off++] = Armv8A64MkInstrTbnz((int32_t)offMisalignedAccess - (int32_t)off, idxRegFlatPtr, 0);
715 }
716 else
717#endif
718 {
719 /* test regflat, fAlignMask */
720 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegFlatPtr, a_cbMem - 1);
721 /* jnz tlbmiss */
722 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offMisalignedAccess, kIemNativeInstrCond_ne);
723 }
724 /** @todo ARM64: two byte access checks can be reduced to single instruction */
725 iemNativeFixupFixedJump(pReNative, offFixupMisalignedAccessJmpBack, off);
726 }
727 else
728 {
729 /*
730 * 2a. Strict alignment check using fAlignMask for atomic, strictly
731 * aligned stuff (SSE & AVX) and AC=1 (ring-3).
732 */
733 bool const fStrictAlignmentCheck = fAlignMask
734 && ( (a_fAlignMaskAndCtl & ~UINT32_C(0xff))
735 || (a_fAccess & IEM_ACCESS_ATOMIC)
736 || (pReNative->fExec & IEM_F_X86_AC) );
737 if (fStrictAlignmentCheck)
738 {
739 /* test regflat, fAlignMask */
740 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegFlatPtr, fAlignMask);
741
742#ifndef IEM_WITH_TLB_STATISTICS
743 /* jnz tlbmiss */
744 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
745#else
746 /* jz 1F; inc stat; jmp tlbmiss */
747 uint32_t const offFixup1 = off;
748 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 16, kIemNativeInstrCond_e);
749 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
750 offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissAlignment));
751 off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
752 iemNativeFixupFixedJump(pReNative, offFixup1, off);
753#endif
754 }
755
756 /*
757 * 2b. Check that it's not crossing page a boundrary if the access is
758 * larger than the aligment mask or if we didn't do the strict
759 * alignment check above.
760 */
761 if ( a_cbMem > 1
762 && ( !fStrictAlignmentCheck
763 || a_cbMem > fAlignMask + 1U))
764 {
765 /* reg1 = regflat & 0xfff */
766 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,/*=*/ idxRegFlatPtr,/*&*/ GUEST_PAGE_OFFSET_MASK);
767 /* cmp reg1, GUEST_PAGE_SIZE - a_cbMem */
768 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE - a_cbMem);
769#ifndef IEM_WITH_TLB_STATISTICS
770 /* ja tlbmiss */
771 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
772#else
773 /* jbe 1F; inc stat; jmp tlbmiss */
774 uint32_t const offFixup1 = off;
775 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 16, kIemNativeInstrCond_be);
776 off = iemNativeEmitIncU32CounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
777 offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissCrossPage));
778 off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
779 iemNativeFixupFixedJump(pReNative, offFixup1, off);
780#endif
781 }
782 }
783 }
784 else
785 Assert(a_fAlignMaskAndCtl == 0);
786
787 /*
788 * 3. TLB lookup.
789 *
790 * 3a. Calculate the TLB tag value (IEMTLB_CALC_TAG_NO_REV).
791 * In 64-bit mode we will also check for non-canonical addresses here.
792 */
793 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
794 {
795# if defined(RT_ARCH_AMD64)
796 /* mov reg1, regflat */
797 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr);
798 /* rol reg1, 16 */
799 off = iemNativeEmitRotateGprLeftEx(pCodeBuf, off, pTlbState->idxReg1, 16);
800 /** @todo Would 'movsx reg2, word reg1' and working on reg2 in dwords be faster? */
801 /* inc word reg1 */
802 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
803 if (pTlbState->idxReg1 >= 8)
804 pCodeBuf[off++] = X86_OP_REX_B;
805 pCodeBuf[off++] = 0xff;
806 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, pTlbState->idxReg1 & 7);
807 /* cmp word reg1, 1 */
808 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
809 if (pTlbState->idxReg1 >= 8)
810 pCodeBuf[off++] = X86_OP_REX_B;
811 pCodeBuf[off++] = 0x83;
812 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, pTlbState->idxReg1 & 7);
813 pCodeBuf[off++] = 1;
814# ifndef IEM_WITH_TLB_STATISTICS
815 /* ja tlbmiss */
816 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
817# else
818 /* jbe 1F; inc stat; jmp tlbmiss */
819 uint32_t const offFixup1 = off;
820 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 16, kIemNativeInstrCond_be);
821 off = iemNativeEmitIncU32CounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
822 offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissNonCanonical));
823 off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
824 iemNativeFixupFixedJump(pReNative, offFixup1, off);
825# endif
826 /* shr reg1, 16 + GUEST_PAGE_SHIFT */
827 off = iemNativeEmitShiftGprRightEx(pCodeBuf, off, pTlbState->idxReg1, 16 + GUEST_PAGE_SHIFT);
828
829# elif defined(RT_ARCH_ARM64)
830 /* lsr reg1, regflat, #48 */
831 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(pTlbState->idxReg1, idxRegFlatPtr, 48);
832 /* add reg1, reg1, #1 */
833 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(pTlbState->idxReg1, pTlbState->idxReg1, 1, false /*f64Bit*/);
834 /* tst reg1, #0xfffe */
835 Assert(Armv8A64ConvertImmRImmS2Mask32(14, 31) == 0xfffe);
836 pCodeBuf[off++] = Armv8A64MkInstrTstImm(pTlbState->idxReg1, 14, 31, false /*f64Bit*/);
837# ifndef IEM_WITH_TLB_STATISTICS
838 /* b.ne tlbmiss */
839 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
840# else
841 /* b.eq 1F; inc stat; jmp tlbmiss */
842 uint32_t const offFixup1 = off;
843 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 16, kIemNativeInstrCond_e);
844 off = iemNativeEmitIncU32CounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
845 offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissNonCanonical));
846 off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
847 iemNativeFixupFixedJump(pReNative, offFixup1, off);
848# endif
849
850 /* ubfx reg1, regflat, #12, #36 */
851 pCodeBuf[off++] = Armv8A64MkInstrUbfx(pTlbState->idxReg1, idxRegFlatPtr, GUEST_PAGE_SHIFT, 48 - GUEST_PAGE_SHIFT);
852# else
853# error "Port me"
854# endif
855 }
856 else
857 {
858 /* reg1 = (uint32_t)(regflat >> 12) */
859 off = iemNativeEmitGpr32EqGprShiftRightImmEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr, GUEST_PAGE_SHIFT);
860 }
861
862 /* or reg1, [qword pVCpu->iem.s.DataTlb.uTlbRevision] */
863# if defined(RT_ARCH_AMD64)
864 pCodeBuf[off++] = pTlbState->idxReg1 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
865 pCodeBuf[off++] = 0x0b; /* OR r64,r/m64 */
866 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, pTlbState->idxReg1,
867 fEvenFirst ? offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevision)
868 : offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevisionGlobal));
869# else
870# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
871 /* Load uTlbRevision[Global] into reg3 and uTlbPhysRev into reg5.
872 We load the pointer for IEMTLB::aEntries[!fEvenFirst] into reg4 and use
873 it for addressing here and later when calculating pTble (saves one
874 instruction, simplifies odd-first). */
875 AssertCompileMemberAlignment(IEMTLB, uTlbRevision, 16); /* It is said that misaligned pair loads doesn't perform well. */
876 AssertCompileAdjacentMembers(IEMTLB, uTlbRevision, uTlbPhysRev);
877 AssertCompileAdjacentMembers(IEMTLB, uTlbPhysRev, uTlbRevisionGlobal);
878 AssertCompile(RTASSERT_OFFSET_OF(IEMTLB, uTlbPhysRev) < RTASSERT_OFFSET_OF(IEMTLB, aEntries));
879 AssertCompile(RTASSERT_OFFSET_OF(VMCPUCC, iem.s.DataTlb.aEntries) < _64K);
880 uint32_t const offEntries = offVCpuTlb + RT_UOFFSETOF(IEMTLB, aEntries) + (fEvenFirst ? 0 : sizeof(IEMTLBENTRY));
881 if (offEntries < _64K)
882 {
883 pCodeBuf[off++] = Armv8A64MkInstrMovZ(pTlbState->idxReg4, offEntries);
884 pCodeBuf[off++] = Armv8A64MkInstrAddReg(pTlbState->idxReg4, IEMNATIVE_REG_FIXED_PVMCPU, pTlbState->idxReg4);
885 }
886 else
887 {
888 AssertCompileMemberAlignment(VMCPUCC, iem.s.CodeTlb.aEntries, 32);
889 AssertCompileMemberAlignment(IEMTLB, aEntries, 32);
890 AssertCompileSizeAlignment(IEMTLBENTRY, 32);
891# if IEMTLB_ENTRY_COUNT <= 16384 /*?*/
892 AssertCompile(RTASSERT_OFFSET_OF(VMCPUCC, iem.s.CodeTlb.aEntries) < _64K*32U);
893 pCodeBuf[off++] = Armv8A64MkInstrMovZ(pTlbState->idxReg4, offEntries >> 5);
894 pCodeBuf[off++] = Armv8A64MkInstrAddReg(pTlbState->idxReg4, IEMNATIVE_REG_FIXED_PVMCPU, pTlbState->idxReg4,
895 true /*64Bit*/, false /*fSetFlags*/, 5 /*cShift*/, kArmv8A64InstrShift_Lsl);
896# else
897 AssertCompile(RTASSERT_OFFSET_OF(VMCPUCC, iem.s.CodeTlb.aEntries) >= _64K*32U);
898 pCodeBuf[off++] = Armv8A64MkInstrMovZ(pTlbState->idxReg4, offEntries & UINT16_MAX);
899 pCodeBuf[off++] = Armv8A64MkInstrMovK(pTlbState->idxReg4, offEntries >> 16, 1);
900 pCodeBuf[off++] = Armv8A64MkInstrAddReg(pTlbState->idxReg4, IEMNATIVE_REG_FIXED_PVMCPU, pTlbState->idxReg4);
901# endif
902 }
903 AssertCompile(RTASSERT_OFFSET_OF(IEMTLB, aEntries) < 64U*8U - sizeof(IEMTLBENTRY));
904 if (fEvenFirst)
905 pCodeBuf[off++] = Armv8A64MkInstrLdPairGpr(pTlbState->idxReg3, pTlbState->idxReg5, pTlbState->idxReg4,
906 (RT_OFFSETOF(IEMTLB, uTlbRevision) - RT_OFFSETOF(IEMTLB, aEntries)) / 8);
907 else /* This isn't 128-bit aligned, hope that doesn't hurt too much... */
908 pCodeBuf[off++] = Armv8A64MkInstrLdPairGpr(pTlbState->idxReg5, pTlbState->idxReg3, pTlbState->idxReg4,
909 ( RT_OFFSETOF(IEMTLB, uTlbPhysRev) - RT_OFFSETOF(IEMTLB, aEntries)
910 - (int)sizeof(IEMTLBENTRY)) / 8);
911# else
912 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3,
913 fEvenFirst ? offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevision)
914 : offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevisionGlobal));
915# endif
916 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
917# endif
918
919 /*
920 * 3b. Calc pTlbe.
921 */
922# if !defined(RT_ARCH_ARM64) || !defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR)
923 uint32_t const offTlbEntriesAdjusted = offVCpuTlb + RT_UOFFSETOF(IEMTLB, aEntries) + (fEvenFirst ? 0 : sizeof(IEMTLBENTRY));
924# endif
925# if defined(RT_ARCH_AMD64)
926# if IEMTLB_ENTRY_COUNT == 256
927 /* movzx reg2, byte reg1 */
928 off = iemNativeEmitLoadGprFromGpr8Ex(pCodeBuf, off, pTlbState->idxReg2, pTlbState->idxReg1);
929# else
930 /* mov reg2, reg1 */
931 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, pTlbState->idxReg2, pTlbState->idxReg1);
932 /* and reg2, IEMTLB_ENTRY_COUNT - 1U */
933 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg2, IEMTLB_ENTRY_COUNT - 1U);
934# endif
935 /* shl reg2, 6 ; reg2 *= sizeof(IEMTLBENTRY) * 2 */
936 AssertCompileSize(IEMTLBENTRY, 32);
937 off = iemNativeEmitShiftGprLeftEx(pCodeBuf, off, pTlbState->idxReg2, 6);
938 /* lea reg2, [&pVCpu->iem.s.DataTlb.aEntries[!fEvenFirst] + reg2] */
939 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU < 8);
940 pCodeBuf[off++] = pTlbState->idxReg2 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_X | X86_OP_REX_R;
941 pCodeBuf[off++] = 0x8d;
942 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, pTlbState->idxReg2 & 7, 4 /*SIB*/);
943 pCodeBuf[off++] = X86_SIB_MAKE(IEMNATIVE_REG_FIXED_PVMCPU & 7, pTlbState->idxReg2 & 7, 0);
944 pCodeBuf[off++] = RT_BYTE1(offTlbEntriesAdjusted);
945 pCodeBuf[off++] = RT_BYTE2(offTlbEntriesAdjusted);
946 pCodeBuf[off++] = RT_BYTE3(offTlbEntriesAdjusted);
947 pCodeBuf[off++] = RT_BYTE4(offTlbEntriesAdjusted);
948
949# elif defined(RT_ARCH_ARM64)
950 /* reg2 = (reg1 & tlbmask) << 6 */
951 AssertCompileSize(IEMTLBENTRY, 32);
952 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(pTlbState->idxReg2, pTlbState->idxReg1, 6, IEMTLB_ENTRY_COUNT_AS_POWER_OF_TWO);
953# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
954 /* reg2 += &pVCpu->iem.s.[Data|Code]Tlb.aEntries[!fEvenFirst] */
955 pCodeBuf[off++] = Armv8A64MkInstrAddReg(pTlbState->idxReg2, pTlbState->idxReg2, pTlbState->idxReg4);
956# else
957 /* reg2 += offsetof(VMCPUCC, iem.s.DataTlb.aEntries[!fEvenFirst]) */
958 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, pTlbState->idxReg2, offTlbEntriesAdjusted, pTlbState->idxReg3 /*iGprTmp*/);
959 /* reg2 += pVCpu */
960 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, pTlbState->idxReg2, IEMNATIVE_REG_FIXED_PVMCPU);
961# endif
962# else
963# error "Port me"
964# endif
965
966 /*
967 * 3c. Compare the TLBE.uTag with the one from 2a (reg1).
968 */
969# if defined(RT_ARCH_AMD64)
970 /* cmp reg1, [reg2] */
971 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
972 pCodeBuf[off++] = 0x3b;
973 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
974# elif defined(RT_ARCH_ARM64)
975 /* reg3 = uTag; [pair: reg4 = fFlagsAndPhysRev;] */
976# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
977 AssertCompileMemberAlignment(IEMTLBENTRY, uTag, 16); /* It is said that misaligned pair loads doesn't perform well. */
978 AssertCompile(RT_UOFFSETOF(IEMTLBENTRY, uTag) + sizeof(uint64_t) == RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
979 pCodeBuf[off++] = Armv8A64MkInstrLdPairGpr(pTlbState->idxReg3, pTlbState->idxReg4,
980 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag) / 8);
981# else
982 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg3, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
983# endif
984 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
985# else
986# error "Port me"
987# endif
988 /* jne checkalttlbe_and_missedtagstats */
989# ifndef IEM_WITH_TLB_STATISTICS
990 if (!fIncCheckAltTlbe)
991 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
992 else
993# endif
994 {
995 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckAltTlbeAndMissedTagStats, kIemNativeInstrCond_ne);
996 if (fIncCheckAltTlbe)
997 iemNativeFixupFixedJump(pReNative, offFixupCheckAltTlbeJmpBack, off);
998 }
999
1000 /*
1001 * 4. Check TLB page table level access flags and physical page revision #.
1002 */
1003 /* mov reg1, mask */
1004 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
1005 uint64_t const fNoUser = (((pReNative->fExec >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK) + 1) & IEMTLBE_F_PT_NO_USER;
1006 uint64_t fTlbe = IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3 | IEMTLBE_F_PG_UNASSIGNED | IEMTLBE_F_PT_NO_ACCESSED
1007 | fNoUser;
1008 if RT_CONSTEXPR_IF((a_fAccess & IEM_ACCESS_TYPE_EXEC) != 0)
1009 fTlbe |= IEMTLBE_F_PT_NO_EXEC /*| IEMTLBE_F_PG_NO_READ?*/;
1010 if RT_CONSTEXPR_IF((a_fAccess & IEM_ACCESS_TYPE_READ) != 0)
1011 fTlbe |= IEMTLBE_F_PG_NO_READ;
1012 if RT_CONSTEXPR_IF((a_fAccess & IEM_ACCESS_TYPE_WRITE) != 0)
1013 fTlbe |= IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PG_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY;
1014 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1, fTlbe);
1015# if defined(RT_ARCH_AMD64)
1016 /* and reg1, [reg2->fFlagsAndPhysRev] */
1017 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
1018 pCodeBuf[off++] = 0x23;
1019 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1,
1020 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
1021
1022 /* cmp reg1, [pVCpu->iem.s.DataTlb.uTlbPhysRev] */
1023 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R);
1024 pCodeBuf[off++] = 0x3b;
1025 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, IEMNATIVE_REG_FIXED_PVMCPU,
1026 offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbPhysRev));
1027# elif defined(RT_ARCH_ARM64)
1028# ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR
1029 pCodeBuf[off++] = Armv8A64MkInstrAnd(pTlbState->idxReg1, pTlbState->idxReg1, pTlbState->idxReg4);
1030 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg5);
1031# else
1032 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg3,
1033 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
1034 pCodeBuf[off++] = Armv8A64MkInstrAnd(pTlbState->idxReg1, pTlbState->idxReg1, pTlbState->idxReg3);
1035 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3, offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbPhysRev));
1036 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
1037# endif
1038# else
1039# error "Port me"
1040# endif
1041# ifndef IEM_WITH_TLB_STATISTICS
1042 /* jne tlbmiss */
1043 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
1044# else
1045 /* je 2F; inc stat; jmp tlbmiss */
1046 uint32_t const offFixup2 = off;
1047 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 16, kIemNativeInstrCond_e);
1048 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
1049 offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissFlagsAndPhysRev));
1050 off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
1051 iemNativeFixupFixedJump(pReNative, offFixup2, off);
1052# endif
1053
1054 /*
1055 * 5. Check that pbMappingR3 isn't NULL (paranoia) and calculate the
1056 * resulting pointer.
1057 *
1058 * For code TLB lookups we have some more work to do here to set various
1059 * IEMCPU members and we return a GCPhys address rather than a host pointer.
1060 */
1061# if defined(RT_ARCH_ARM64)
1062 uint8_t const idxRegMappingPtr = a_fDataTlb && idxRegFlatPtr != idxRegMemResult /* See step 1b. */
1063 ? idxRegMemResult /* saves one instruction */ : pTlbState->idxReg1;
1064# else
1065 uint8_t const idxRegMappingPtr = pTlbState->idxReg1; /** @todo optimize the AMD64 case as well. */
1066# endif
1067# if defined(RT_ARCH_ARM64) && defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR)
1068 if (!a_fDataTlb)
1069 {
1070 /* ldp reg4, reg1, [reg2->GCPhys+pbMappingR3] */
1071 AssertCompileMemberAlignment(IEMTLBENTRY, GCPhys, 16);
1072 AssertCompileAdjacentMembers(IEMTLBENTRY, GCPhys, pbMappingR3);
1073 pCodeBuf[off++] = Armv8A64MkInstrLdPairGpr(pTlbState->idxReg4, idxRegMappingPtr,
1074 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, GCPhys) / 8);
1075 }
1076 else
1077# endif
1078 {
1079 /* mov reg1, [reg2->pbMappingR3] */
1080 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMappingPtr, pTlbState->idxReg2,
1081 RT_UOFFSETOF(IEMTLBENTRY, pbMappingR3));
1082 }
1083
1084 if (a_fDataTlb)
1085 {
1086 if (idxRegFlatPtr == idxRegMemResult) /* See step 1b. */
1087 {
1088 Assert(idxRegMappingPtr == pTlbState->idxReg1);
1089 /* and result, 0xfff */
1090 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegMemResult, GUEST_PAGE_OFFSET_MASK);
1091 /* add result, reg1 */
1092 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, idxRegMappingPtr);
1093 }
1094 else
1095 {
1096 Assert(idxRegFlatPtr == pTlbState->idxRegPtr);
1097# if defined(RT_ARCH_ARM64)
1098 Assert(idxRegMappingPtr == idxRegMemResult);
1099 AssertCompile(GUEST_PAGE_SIZE <= HOST_PAGE_SIZE);
1100 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegMemResult, idxRegFlatPtr, 0, GUEST_PAGE_SHIFT);
1101# else
1102 Assert(idxRegMappingPtr == pTlbState->idxReg1);
1103 /* result = regflat & 0xfff */
1104 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, idxRegMemResult, idxRegFlatPtr, GUEST_PAGE_OFFSET_MASK);
1105 /* add result, reg1 */
1106 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxReg1);
1107# endif
1108 }
1109 }
1110 else
1111 {
1112 /*
1113 * Code TLB use a la iemOpcodeFetchBytesJmp - keep reg2 pointing to the TLBE.
1114 *
1115 * Note. We do not need to set offCurInstrStart or offInstrNextByte.
1116 */
1117# if !defined(RT_ARCH_ARM64) || !defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR)
1118# ifdef RT_ARCH_AMD64
1119 uint8_t const idxReg3 = UINT8_MAX;
1120# else
1121 uint8_t const idxReg3 = pTlbState->idxReg3;
1122# endif
1123 /* Set pbInstrBuf first since we've got it loaded already. */
1124 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg1,
1125 RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf), idxReg3);
1126 /* Set uInstrBufPc to (FlatPC & ~GUEST_PAGE_OFFSET_MASK). */
1127 off = iemNativeEmitGprEqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr, ~(RTGCPTR)GUEST_PAGE_OFFSET_MASK);
1128 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg1,
1129 RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc), idxReg3);
1130 /* Set cbInstrBufTotal to GUEST_PAGE_SIZE. */ /** @todo this is a simplifications. Calc right size using CS.LIM and EIP? */
1131 off = iemNativeEmitStoreImmToVCpuU16Ex(pCodeBuf, off, GUEST_PAGE_SIZE, RT_UOFFSETOF(VMCPUCC, iem.s.cbInstrBufTotal),
1132 pTlbState->idxReg1, idxReg3);
1133 /* Now set GCPhysInstrBuf last as we'll be returning it in idxRegMemResult. */
1134# if defined(RT_ARCH_ARM64) && defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR)
1135 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg4,
1136 RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf), idxReg3);
1137# else
1138 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg1,
1139 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, GCPhys));
1140 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg1,
1141 RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf), idxReg3);
1142# endif
1143# else
1144 /* ARM64: Same as above but using STP. This ASSUMES that we can trash
1145 the 6 bytes following iem.s.cbInstrBufTotal! */
1146 AssertCompileMemberAlignment(VMCPUCC, iem.s.pbInstrBuf, 16);
1147 AssertCompileAdjacentMembers(VMCPUCC, iem.s.pbInstrBuf, iem.s.uInstrBufPc);
1148 AssertCompile(RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf) < 512);
1149 /* idxReg1 = reg2->pbMappingR3 (see previous LDP) */
1150 /* idxReg3 = FlatPC & ~GUEST_PAGE_OFFSET_MASK. */
1151 off = iemNativeEmitGprEqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg3, idxRegFlatPtr, ~(RTGCPTR)GUEST_PAGE_OFFSET_MASK);
1152 pCodeBuf[off++] = Armv8A64MkInstrStPairGpr(pTlbState->idxReg1, pTlbState->idxReg3,
1153 IEMNATIVE_REG_FIXED_PVMCPU, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf) / 8);
1154
1155 AssertCompileMemberAlignment(VMCPUCC, iem.s.GCPhysInstrBuf, 16);
1156 AssertCompileAdjacentMembers(VMCPUCC, iem.s.GCPhysInstrBuf, iem.s.cbInstrBufTotal);
1157 AssertCompile(RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf) < 512);
1158# ifndef IEM_WITH_OPAQUE_DECODER_STATE
1159 AssertCompileAdjacentMembers(VMCPUCC, iem.s.cbInstrBufTotal, iem.s.offCurInstrStart);
1160 AssertCompileAdjacentMembers(VMCPUCC, iem.s.offCurInstrStart, iem.s.fPrefixes); /* these two will be set to ~0. */
1161# endif
1162 /* idxReg4 = reg2->GCPhys (see previous LDP) */
1163 /* idxReg3 = GUEST_PAGE_SIZE | UINT64_C(0xffffffffffff0000) */
1164 pCodeBuf[off++] = Armv8A64MkInstrMovN(pTlbState->idxReg3, ~GUEST_PAGE_SIZE & 0xffff);
1165 pCodeBuf[off++] = Armv8A64MkInstrStPairGpr(pTlbState->idxReg4, pTlbState->idxReg3,
1166 IEMNATIVE_REG_FIXED_PVMCPU, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf) / 8);
1167# endif
1168 if (!a_fNoReturn) /* (We skip this for iemNativeEmitBltLoadTlbAfterBranch.) */
1169 {
1170 /* Set idxRegMemResult. */
1171 if (idxRegFlatPtr == idxRegMemResult) /* See step 1b. */
1172 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegMemResult, GUEST_PAGE_OFFSET_MASK);
1173 else
1174 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, idxRegMemResult, idxRegFlatPtr, GUEST_PAGE_OFFSET_MASK);
1175# if defined(RT_ARCH_ARM64) && defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR)
1176 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxReg4);
1177# else
1178 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxReg1);
1179# endif
1180 }
1181 }
1182
1183# if 0
1184 /*
1185 * To verify the result we call iemNativeHlpCheckTlbLookup via a wrapper.
1186 *
1187 * It's like the state logging, so parameters are passed on the stack.
1188 * iemNativeHlpAsmSafeWrapCheckTlbLookup(pVCpu, result, addr, seg | (a_cbMem << 8) | (a_fAccess << 16))
1189 */
1190 if (a_fDataTlb)
1191 {
1192# ifdef RT_ARCH_AMD64
1193 if (!offDisp && !(a_fAccess & 0x8000))
1194 {
1195 /* push seg | (a_cbMem << 8) | (a_fAccess << 16) */
1196 pCodeBuf[off++] = 0x68;
1197 pCodeBuf[off++] = iSegReg;
1198 pCodeBuf[off++] = a_cbMem;
1199 pCodeBuf[off++] = RT_BYTE1(a_fAccess);
1200 pCodeBuf[off++] = RT_BYTE2(a_fAccess);
1201 }
1202 else
1203 {
1204 /* mov reg1, seg | (a_cbMem << 8) | (a_fAccess << 16) | (offDisp << 32) */
1205 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1,
1206 iSegReg | ((uint32_t)a_cbMem << 8) | (a_fAccess << 16) | ((uint64_t)offDisp << 32));
1207 /* push reg1 */
1208 if (pTlbState->idxReg1 >= 8)
1209 pCodeBuf[off++] = X86_OP_REX_B;
1210 pCodeBuf[off++] = 0x50 + (pTlbState->idxReg1 & 7);
1211 }
1212 /* push pTlbState->idxRegPtr / immediate address. */
1213 if (pTlbState->idxRegPtr != UINT8_MAX)
1214 {
1215 if (pTlbState->idxRegPtr >= 8)
1216 pCodeBuf[off++] = X86_OP_REX_B;
1217 pCodeBuf[off++] = 0x50 + (pTlbState->idxRegPtr & 7);
1218 }
1219 else
1220 {
1221 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->uAbsPtr);
1222 if (pTlbState->idxReg1 >= 8)
1223 pCodeBuf[off++] = X86_OP_REX_B;
1224 pCodeBuf[off++] = 0x50 + (pTlbState->idxReg1 & 7);
1225 }
1226 /* push idxRegMemResult */
1227 if (idxRegMemResult >= 8)
1228 pCodeBuf[off++] = X86_OP_REX_B;
1229 pCodeBuf[off++] = 0x50 + (idxRegMemResult & 7);
1230 /* push pVCpu */
1231 pCodeBuf[off++] = 0x50 + IEMNATIVE_REG_FIXED_PVMCPU;
1232 /* reg1 = helper; call reg1 */
1233 off = iemNativeEmitCallImmEx(pCodeBuf, off, (uintptr_t)iemNativeHlpAsmSafeWrapCheckTlbLookup, pTlbState->idxReg1);
1234 /* The stack is cleaned up by the helper function. */
1235
1236# elif defined(RT_ARCH_ARM64)
1237 /* Use the temporary registers for setting up the "call frame" and making the call. */
1238 /* reg1 = seg | (a_cbMem << 8) | (a_fAccess << 16) */
1239 pCodeBuf[off++] = Armv8A64MkInstrMovZ(pTlbState->idxReg1, RT_MAKE_U16(iSegReg, a_cbMem));
1240 pCodeBuf[off++] = Armv8A64MkInstrMovK(pTlbState->idxReg1, RT_LO_U16(a_fAccess), 1);
1241 if (offDisp)
1242 pCodeBuf[off++] = Armv8A64MkInstrMovK(pTlbState->idxReg1, offDisp, 2);
1243 if (pTlbState->idxRegPtr != UINT8_MAX)
1244 {
1245 /* stp idxRegPtr, reg1, [sp, #-16]! */
1246 pCodeBuf[off++] = Armv8A64MkInstrStPairGpr(pTlbState->idxRegPtr, pTlbState->idxReg1,
1247 ARMV8_A64_REG_SP, -2, kArm64InstrStLdPairType_PreIndex);
1248 }
1249 else
1250 {
1251 /* reg2 = immediate address */
1252 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg2, pTlbState->uAbsPtr);
1253 /* stp reg2, reg1, [sp, #-16]! */
1254 pCodeBuf[off++] = Armv8A64MkInstrStPairGpr(pTlbState->idxReg2, pTlbState->idxReg1,
1255 ARMV8_A64_REG_SP, -2, kArm64InstrStLdPairType_PreIndex);
1256 }
1257 /* stp pVCpu, idxRegMemResult, [sp, #-16]! (we don't need pVCpu, but push it for stack alignment) */
1258 pCodeBuf[off++] = Armv8A64MkInstrStPairGpr(IEMNATIVE_REG_FIXED_PVMCPU, idxRegMemResult,
1259 ARMV8_A64_REG_SP, -2, kArm64InstrStLdPairType_PreIndex);
1260 /* reg1 = helper; brl reg1 */
1261 off = iemNativeEmitCallImmEx(pCodeBuf, off, (uintptr_t)iemNativeHlpAsmSafeWrapCheckTlbLookup, pTlbState->idxReg1);
1262 /* The stack is cleaned up by the helper function. */
1263
1264# else
1265# error "Port me"
1266# endif
1267 }
1268
1269# endif
1270
1271 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1272
1273 return off;
1274}
1275#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1276
1277
1278/** @} */
1279
1280#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerTlbLookup_h */
1281
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette