VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompBltIn.cpp@ 102850

Last change on this file since 102850 was 102850, checked in by vboxsync, 15 months ago

VMM/IEM: Implemented the first of two code TLB lookups. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 85.7 KB
Line 
1/* $Id: IEMAllN8veRecompBltIn.cpp 102850 2024-01-12 00:47:47Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, Emitters for Built-In Threaded Functions.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33//#define IEM_WITH_OPAQUE_DECODER_STATE - need offCurInstrStart access for iemNativeHlpMemCodeNewPageTlbMiss and friends.
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#include <VBox/vmm/iem.h>
37#include <VBox/vmm/cpum.h>
38#include <VBox/vmm/dbgf.h>
39#include "IEMInternal.h"
40#include <VBox/vmm/vmcc.h>
41#include <VBox/log.h>
42#include <VBox/err.h>
43#include <VBox/param.h>
44#include <iprt/assert.h>
45#include <iprt/string.h>
46#if defined(RT_ARCH_AMD64)
47# include <iprt/x86.h>
48#elif defined(RT_ARCH_ARM64)
49# include <iprt/armv8.h>
50#endif
51
52
53#include "IEMInline.h"
54#include "IEMThreadedFunctions.h"
55#include "IEMN8veRecompiler.h"
56#include "IEMN8veRecompilerEmit.h"
57#include "IEMN8veRecompilerTlbLookup.h"
58
59
60
61/*********************************************************************************************************************************
62* TB Helper Functions *
63*********************************************************************************************************************************/
64#ifdef RT_ARCH_AMD64
65DECLASM(void) iemNativeHlpAsmSafeWrapLogCpuState(void);
66#endif
67
68
69/**
70 * Used by TB code to deal with a TLB miss for a new page.
71 */
72IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCodeNewPageTlbMiss,(PVMCPUCC pVCpu))
73{
74 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeCodeTlbMissesNewPage);
75 pVCpu->iem.s.pbInstrBuf = NULL;
76 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE;
77 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
78 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
79 if (pVCpu->iem.s.pbInstrBuf)
80 { /* likely */ }
81 else
82 {
83 IEM_DO_LONGJMP(pVCpu, VINF_IEM_REEXEC_BREAK);
84 }
85}
86
87
88/**
89 * Used by TB code to deal with a TLB miss for a new page.
90 */
91IEM_DECL_NATIVE_HLP_DEF(RTGCPHYS, iemNativeHlpMemCodeNewPageTlbMissWithOff,(PVMCPUCC pVCpu, uint8_t offInstr))
92{
93 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeCodeTlbMissesNewPageWithOffset);
94 pVCpu->iem.s.pbInstrBuf = NULL;
95 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE - offInstr;
96 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
97 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
98 return pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf : NIL_RTGCPHYS;
99}
100
101
102/*********************************************************************************************************************************
103* Builtin functions *
104*********************************************************************************************************************************/
105
106/**
107 * Built-in function that does nothing.
108 *
109 * Whether this is called or not can be controlled by the entry in the
110 * IEMThreadedGenerator.katBltIns table. This can be useful to determine
111 * whether why behaviour changes when enabling the LogCpuState builtins. I.e.
112 * whether it's the reduced call count in the TBs or the threaded calls flushing
113 * register state.
114 */
115IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_Nop)
116{
117 RT_NOREF(pReNative, pCallEntry);
118 return off;
119}
120
121
122/**
123 * Emits for for LogCpuState.
124 *
125 * This shouldn't have any relevant impact on the recompiler state.
126 */
127IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_LogCpuState)
128{
129#ifdef RT_ARCH_AMD64
130 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
131 /* push rax */
132 pbCodeBuf[off++] = 0x50 + X86_GREG_xAX;
133 /* push imm32 */
134 pbCodeBuf[off++] = 0x68;
135 pbCodeBuf[off++] = RT_BYTE1(pCallEntry->auParams[0]);
136 pbCodeBuf[off++] = RT_BYTE2(pCallEntry->auParams[0]);
137 pbCodeBuf[off++] = RT_BYTE3(pCallEntry->auParams[0]);
138 pbCodeBuf[off++] = RT_BYTE4(pCallEntry->auParams[0]);
139 /* mov rax, iemNativeHlpAsmSafeWrapLogCpuState */
140 pbCodeBuf[off++] = X86_OP_REX_W;
141 pbCodeBuf[off++] = 0xb8 + X86_GREG_xAX;
142 *(uint64_t *)&pbCodeBuf[off] = (uintptr_t)iemNativeHlpAsmSafeWrapLogCpuState;
143 off += sizeof(uint64_t);
144 /* call rax */
145 pbCodeBuf[off++] = 0xff;
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
147 /* pop rax */
148 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
149 /* pop rax */
150 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
151 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
152
153#else
154 /** @todo Implement this */
155 AssertFailed();
156 RT_NOREF(pReNative, pCallEntry);
157#endif
158 return off;
159}
160
161
162/**
163 * Built-in function that calls a C-implemention function taking zero arguments.
164 */
165IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
166{
167 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
168 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
169 uint64_t const fGstShwFlush = pCallEntry->auParams[2];
170 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
171}
172
173
174/**
175 * Built-in function that checks for pending interrupts that can be delivered or
176 * forced action flags.
177 *
178 * This triggers after the completion of an instruction, so EIP is already at
179 * the next instruction. If an IRQ or important FF is pending, this will return
180 * a non-zero status that stops TB execution.
181 */
182IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
183{
184 RT_NOREF(pCallEntry);
185
186 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
187 and I'm too lazy to create a 'Fixed' version of that one. */
188 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
189 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
190
191 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
192
193 /* Again, we need to load the extended EFLAGS before we actually need them
194 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
195 loaded them inside the check, as the shadow state would not be correct
196 when the code branches before the load. Ditto PC. */
197 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
198 kIemNativeGstRegUse_ReadOnly);
199
200 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
201
202 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
203
204 /*
205 * Start by checking the local forced actions of the EMT we're on for IRQs
206 * and other FFs that needs servicing.
207 */
208 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
209 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
210 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
211 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
212 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
213 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
214 | VMCPU_FF_TLB_FLUSH
215 | VMCPU_FF_UNHALT ),
216 true /*fSetFlags*/);
217 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
218 uint32_t const offFixupJumpToVmCheck1 = off;
219 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices */);
220
221 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
222 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
223 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
224 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
225 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
226 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
227
228 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
229 suppressed by the CPU or not. */
230 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
231 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
232 idxLabelReturnBreak);
233
234 /* We've got shadow flags set, so we must check that the PC they are valid
235 for matches our current PC value. */
236 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
237 * a register. */
238 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
239 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
240
241 /*
242 * Now check the force flags of the VM.
243 */
244 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
245 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
246 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
247 off = iemNativeEmitLoadGprByGprU32(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
248 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
249 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
250
251 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
252
253 /*
254 * We're good, no IRQs or FFs pending.
255 */
256 iemNativeRegFreeTmp(pReNative, idxTmpReg);
257 iemNativeRegFreeTmp(pReNative, idxEflReg);
258 iemNativeRegFreeTmp(pReNative, idxPcReg);
259
260 return off;
261}
262
263
264/**
265 * Built-in function checks if IEMCPU::fExec has the expected value.
266 */
267IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
268{
269 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
270 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
271
272 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
273 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
274 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
275 kIemNativeLabelType_ReturnBreak);
276 iemNativeRegFreeTmp(pReNative, idxTmpReg);
277 return off;
278}
279
280
281/**
282 * Sets idxTbCurInstr in preparation of raising an exception or aborting the TB.
283 */
284/** @todo Optimize this, so we don't set the same value more than once. Just
285 * needs some tracking. */
286#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
287# define BODY_SET_CUR_INSTR() \
288 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
289#else
290# define BODY_SET_CUR_INSTR() ((void)0)
291#endif
292
293/**
294 * Flushes pending writes in preparation of raising an exception or aborting the TB.
295 */
296#define BODY_FLUSH_PENDING_WRITES() \
297 off = iemNativeRegFlushPendingWrites(pReNative, off);
298
299
300/**
301 * Macro that emits the 16/32-bit CS.LIM check.
302 */
303#define BODY_CHECK_CS_LIM(a_cbInstr) \
304 off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
305
306DECL_FORCE_INLINE(uint32_t)
307iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
308{
309 Assert(cbInstr > 0);
310 Assert(cbInstr < 16);
311#ifdef VBOX_STRICT
312 off = iemNativeEmitMarker(pReNative, off, 0x80000001);
313#endif
314
315 /*
316 * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
317 * a temporary register for calculating the last address of the instruction.
318 *
319 * The calculation and comparisons are 32-bit. We ASSUME that the incoming
320 * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
321 * that last updated EIP here checked it already, and that we're therefore
322 * safe in the 32-bit wrap-around scenario to only check that the last byte
323 * is within CS.LIM. In the case of instruction-by-instruction advancing
324 * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
325 * must be using 4KB granularity and the previous instruction was fine.
326 */
327 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
328 kIemNativeGstRegUse_ReadOnly);
329 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
330 kIemNativeGstRegUse_ReadOnly);
331#ifdef RT_ARCH_AMD64
332 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
333#elif defined(RT_ARCH_ARM64)
334 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
335#else
336# error "Port me"
337#endif
338
339 if (cbInstr != 1)
340 {
341 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
342
343 /*
344 * 1. idxRegTmp = idxRegPc + cbInstr;
345 * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
346 */
347#ifdef RT_ARCH_AMD64
348 /* 1. lea tmp32, [Pc + cbInstr - 1] */
349 if (idxRegTmp >= 8 || idxRegPc >= 8)
350 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
351 pbCodeBuf[off++] = 0x8d;
352 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
353 if ((idxRegPc & 7) == X86_GREG_xSP)
354 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
355 pbCodeBuf[off++] = cbInstr - 1;
356
357 /* 2. cmp tmp32(r), CsLim(r/m). */
358 if (idxRegTmp >= 8 || idxRegCsLim >= 8)
359 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
360 pbCodeBuf[off++] = 0x3b;
361 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
362
363#elif defined(RT_ARCH_ARM64)
364 /* 1. add tmp32, Pc, #cbInstr-1 */
365 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
366 /* 2. cmp tmp32, CsLim */
367 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
368 false /*f64Bit*/, true /*fSetFlags*/);
369
370#endif
371 iemNativeRegFreeTmp(pReNative, idxRegTmp);
372 }
373 else
374 {
375 /*
376 * Here we can skip step 1 and compare PC and CS.LIM directly.
377 */
378#ifdef RT_ARCH_AMD64
379 /* 2. cmp eip(r), CsLim(r/m). */
380 if (idxRegPc >= 8 || idxRegCsLim >= 8)
381 pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
382 pbCodeBuf[off++] = 0x3b;
383 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
384
385#elif defined(RT_ARCH_ARM64)
386 /* 2. cmp Pc, CsLim */
387 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
388 false /*f64Bit*/, true /*fSetFlags*/);
389
390#endif
391 }
392
393 /* 3. Jump if greater. */
394 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
395
396 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
397 iemNativeRegFreeTmp(pReNative, idxRegPc);
398 return off;
399}
400
401
402/**
403 * Macro that considers whether we need CS.LIM checking after a branch or
404 * crossing over to a new page.
405 */
406#define BODY_CONSIDER_CS_LIM_CHECKING(a_pTb, a_cbInstr) \
407 RT_NOREF(a_cbInstr); \
408 off = iemNativeEmitBltInConsiderLimChecking(pReNative, off)
409
410DECL_FORCE_INLINE(uint32_t)
411iemNativeEmitBltInConsiderLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off)
412{
413#ifdef VBOX_STRICT
414 off = iemNativeEmitMarker(pReNative, off, 0x80000002);
415#endif
416
417 /*
418 * This check must match the ones in the iem in iemGetTbFlagsForCurrentPc
419 * exactly:
420 *
421 * int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
422 * if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
423 * return fRet;
424 * return fRet | IEMTB_F_CS_LIM_CHECKS;
425 *
426 *
427 * We need EIP, CS.LIM and CS.BASE here.
428 */
429
430 /* Calculate the offFromLim first: */
431 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
432 kIemNativeGstRegUse_ReadOnly);
433 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
434 kIemNativeGstRegUse_ReadOnly);
435 uint8_t const idxRegLeft = iemNativeRegAllocTmp(pReNative, &off);
436
437#ifdef RT_ARCH_ARM64
438 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
439 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegLeft, idxRegCsLim, idxRegPc);
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441#else
442 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegLeft, idxRegCsLim);
443 off = iemNativeEmitSubTwoGprs(pReNative, off, idxRegLeft, idxRegPc);
444#endif
445
446 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
447 iemNativeRegFreeTmp(pReNative, idxRegPc);
448
449 /* Calculate the threshold level (right side). */
450 uint8_t const idxRegCsBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
451 kIemNativeGstRegUse_ReadOnly);
452 uint8_t const idxRegRight = iemNativeRegAllocTmp(pReNative, &off);
453
454#ifdef RT_ARCH_ARM64
455 pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
456 Assert(Armv8A64ConvertImmRImmS2Mask32(11, 0) == GUEST_PAGE_OFFSET_MASK);
457 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegRight, idxRegCsBase, 11, 0, false /*f64Bit*/);
458 pu32CodeBuf[off++] = Armv8A64MkInstrNeg(idxRegRight);
459 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
460 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
461 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
462
463#else
464 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegRight, GUEST_PAGE_OFFSET_MASK);
465 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegRight, idxRegCsBase);
466 off = iemNativeEmitNegGpr(pReNative, off, idxRegRight);
467 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRight, X86_PAGE_SIZE + 16);
468#endif
469
470 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
471
472 /* Compare the two and jump out if we're too close to the limit. */
473 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegLeft, idxRegRight);
474 off = iemNativeEmitJlToNewLabel(pReNative, off, kIemNativeLabelType_NeedCsLimChecking);
475
476 iemNativeRegFreeTmp(pReNative, idxRegRight);
477 iemNativeRegFreeTmp(pReNative, idxRegLeft);
478 return off;
479}
480
481
482
483/**
484 * Macro that implements opcode (re-)checking.
485 */
486#define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
487 RT_NOREF(a_cbInstr); \
488 off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
489
490#if 0 /* debugging aid */
491bool g_fBpOnObsoletion = false;
492# define BP_ON_OBSOLETION g_fBpOnObsoletion
493#else
494# define BP_ON_OBSOLETION 0
495#endif
496
497DECL_FORCE_INLINE(uint32_t)
498iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
499{
500 Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
501 Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
502#ifdef VBOX_STRICT
503 off = iemNativeEmitMarker(pReNative, off, 0x80000003);
504#endif
505
506 uint32_t const idxLabelObsoleteTb = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ObsoleteTb);
507
508 /*
509 * Where to start and how much to compare.
510 *
511 * Looking at the ranges produced when r160746 was running a DOS VM with TB
512 * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
513 * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
514 *
515 * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
516 * 12, 10, 11, 6, 13, 15 and 16. Max 0x359 bytes. Same revision as above.
517 */
518 uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange;
519 uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange;
520 Assert(cbLeft > 0);
521 uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes + offRange];
522 uint32_t offConsolidatedJump = UINT32_MAX;
523
524#ifdef RT_ARCH_AMD64
525 /* AMD64/x86 offers a bunch of options. Smaller stuff will can be
526 completely inlined, for larger we use REPE CMPS. */
527# define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */ do { \
528 pbCodeBuf[off++] = a_bOpcode; \
529 Assert(offPage < 127); \
530 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
531 pbCodeBuf[off++] = RT_BYTE1(offPage); \
532 } while (0)
533
534# define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
535 if (offConsolidatedJump != UINT32_MAX) \
536 { \
537 int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
538 Assert(offDisp >= -128); \
539 pbCodeBuf[off++] = 0x75; /* jnz near */ \
540 pbCodeBuf[off++] = (uint8_t)offDisp; \
541 } \
542 else \
543 { \
544 pbCodeBuf[off++] = 0x74; /* jz near +5 */ \
545 pbCodeBuf[off++] = 0x05 + BP_ON_OBSOLETION; \
546 offConsolidatedJump = off; \
547 if (BP_ON_OBSOLETION) pbCodeBuf[off++] = 0xcc; \
548 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */ \
549 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_Rel32, -4); \
550 pbCodeBuf[off++] = 0x00; \
551 pbCodeBuf[off++] = 0x00; \
552 pbCodeBuf[off++] = 0x00; \
553 pbCodeBuf[off++] = 0x00; \
554 } \
555 } while (0)
556
557# define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
558 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
559 pbCodeBuf[off++] = *pbOpcodes++; \
560 pbCodeBuf[off++] = *pbOpcodes++; \
561 pbCodeBuf[off++] = *pbOpcodes++; \
562 pbCodeBuf[off++] = *pbOpcodes++; \
563 cbLeft -= 4; \
564 offPage += 4; \
565 CHECK_OPCODES_CMP_JMP(); \
566 } while (0)
567
568# define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
569 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
570 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
571 pbCodeBuf[off++] = *pbOpcodes++; \
572 pbCodeBuf[off++] = *pbOpcodes++; \
573 cbLeft -= 2; \
574 offPage += 2; \
575 CHECK_OPCODES_CMP_JMP(); \
576 } while (0)
577
578# define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
579 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
580 pbCodeBuf[off++] = *pbOpcodes++; \
581 cbLeft -= 1; \
582 offPage += 1; \
583 CHECK_OPCODES_CMP_JMP(); \
584 } while (0)
585
586# define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
587 if (a_bPrefix) \
588 pbCodeBuf[off++] = (a_bPrefix); \
589 pbCodeBuf[off++] = (a_bOpcode); \
590 CHECK_OPCODES_CMP_JMP(); \
591 cbLeft -= (a_cbToSubtract); \
592 } while (0)
593
594# define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
595 pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
596 pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
597 pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
598 pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
599 pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
600 } while (0)
601
602 if (cbLeft <= 24)
603 {
604 uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
605 ( RT_BIT_32(X86_GREG_xAX)
606 | RT_BIT_32(X86_GREG_xCX)
607 | RT_BIT_32(X86_GREG_xDX)
608 | RT_BIT_32(X86_GREG_xBX)
609 | RT_BIT_32(X86_GREG_xSI)
610 | RT_BIT_32(X86_GREG_xDI))
611 & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
612 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
613 if (offPage >= 128 - cbLeft)
614 {
615 off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
616 offPage &= 3;
617 }
618
619 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 14 + 54 + 8 + 6 + BP_ON_OBSOLETION /* = 87 */);
620
621 if (cbLeft > 8)
622 switch (offPage & 3)
623 {
624 case 0:
625 break;
626 case 1: /* cost: 6 + 8 = 14 */
627 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
628 RT_FALL_THRU();
629 case 2: /* cost: 8 */
630 CHECK_OPCODES_CMP_IMM16(idxRegTmp);
631 break;
632 case 3: /* cost: 6 */
633 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
634 break;
635 }
636
637 while (cbLeft >= 4)
638 CHECK_OPCODES_CMP_IMM32(idxRegTmp); /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
639
640 if (cbLeft >= 2)
641 CHECK_OPCODES_CMP_IMM16(idxRegTmp); /* cost: 8 */
642 if (cbLeft)
643 CHECK_OPCODES_CMP_IMM8(idxRegTmp); /* cost: 6 */
644
645 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
646 iemNativeRegFreeTmp(pReNative, idxRegTmp);
647 }
648 else
649 {
650 /* RDI = &pbInstrBuf[offPage] */
651 uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
652 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
653 if (offPage != 0)
654 off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
655
656 /* RSI = pbOpcodes */
657 uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
658 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
659
660 /* RCX = counts. */
661 uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
662
663 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 10 + 5 + 5 + 3 + 4 + 3 + BP_ON_OBSOLETION /*= 35*/);
664
665 /** @todo profile and optimize this further. Maybe an idea to align by
666 * offPage if the two cannot be reconsidled. */
667 /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
668 switch (offPage & 7) /* max cost: 10 */
669 {
670 case 0:
671 break;
672 case 1: /* cost: 3+4+3 = 10 */
673 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
674 RT_FALL_THRU();
675 case 2: /* cost: 4+3 = 7 */
676 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
677 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
678 break;
679 case 3: /* cost: 3+3 = 6 */
680 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
681 RT_FALL_THRU();
682 case 4: /* cost: 3 */
683 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
684 break;
685 case 5: /* cost: 3+4 = 7 */
686 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
687 RT_FALL_THRU();
688 case 6: /* cost: 4 */
689 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
690 break;
691 case 7: /* cost: 3 */
692 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
693 break;
694 }
695
696 /* Compare qwords: */
697 uint32_t const cQWords = cbLeft >> 3;
698 CHECK_OPCODES_ECX_IMM(cQWords); /* cost: 5 */
699
700 pbCodeBuf[off++] = X86_OP_PRF_REPZ; /* cost: 5 */
701 CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
702 cbLeft &= 7;
703
704 if (cbLeft & 4)
705 CHECK_OPCODES_CMPSX(0xa7, 4, 0); /* cost: 3 */
706 if (cbLeft & 2)
707 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP); /* cost: 4 */
708 if (cbLeft & 1)
709 CHECK_OPCODES_CMPSX(0xa6, 1, 0); /* cost: 3 */
710
711 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
712 iemNativeRegFreeTmp(pReNative, idxRegCx);
713 iemNativeRegFreeTmp(pReNative, idxRegSi);
714 iemNativeRegFreeTmp(pReNative, idxRegDi);
715 }
716
717#elif defined(RT_ARCH_ARM64)
718 /* We need pbInstrBuf in a register, whatever we do. */
719 uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off);
720 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
721
722 /* We also need at least one more register for holding bytes & words we
723 load via pbInstrBuf. */
724 uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off);
725
726 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
727
728 /* One byte compare can be done with the opcode byte as an immediate. We'll
729 do this to uint16_t align src1. */
730 bool fPendingJmp = RT_BOOL(offPage & 1);
731 if (fPendingJmp)
732 {
733 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
734 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/);
735 offPage += 1;
736 cbLeft -= 1;
737 }
738
739 if (cbLeft > 0)
740 {
741 /* We need a register for holding the opcode bytes we're comparing with,
742 as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */
743 uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off);
744
745 /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */
746 if ((offPage & 3) && cbLeft >= 2)
747 {
748 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2);
749 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
750 if (fPendingJmp)
751 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
752 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
753 else
754 {
755 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
756 fPendingJmp = true;
757 }
758 pbOpcodes += 2;
759 offPage += 2;
760 cbLeft -= 2;
761 }
762
763 /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */
764 if ((offPage & 7) && cbLeft >= 4)
765 {
766 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4);
767 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
768 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
769 if (fPendingJmp)
770 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
771 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
772 else
773 {
774 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
775 fPendingJmp = true;
776 }
777 pbOpcodes += 4;
778 offPage += 4;
779 cbLeft -= 4;
780 }
781
782 /*
783 * If we've got 16 bytes or more left, switch to memcmp-style.
784 */
785 if (cbLeft >= 16)
786 {
787 /* We need a pointer to the copy of the original opcode bytes. */
788 uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off);
789 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes);
790
791 /* If there are more than 32 bytes to compare we create a loop, for
792 which we'll need a loop register. */
793 if (cbLeft >= 64)
794 {
795 if (fPendingJmp)
796 {
797 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
798 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
799 fPendingJmp = false;
800 }
801
802 uint8_t const idxRegLoop = iemNativeRegAllocTmp(pReNative, &off);
803 uint16_t const cLoops = cbLeft / 32;
804 cbLeft = cbLeft % 32;
805 pbOpcodes += cLoops * 32;
806 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops);
807
808 if (offPage != 0) /** @todo optimize out this instruction. */
809 {
810 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage);
811 offPage = 0;
812 }
813
814 uint32_t const offLoopStart = off;
815 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0);
816 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0);
817 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
818
819 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1);
820 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1);
821 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
822 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
823
824 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2);
825 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2);
826 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
827 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
828
829 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3);
830 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3);
831 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
832 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
833
834 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
835 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
836
837 /* Advance and loop. */
838 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20);
839 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20);
840 pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/);
841 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off);
842
843 iemNativeRegFreeTmp(pReNative, idxRegLoop);
844 }
845
846 /* Deal with any remaining dwords (uint64_t). There can be up to
847 three if we looped and four if we didn't. */
848 uint32_t offSrc2 = 0;
849 while (cbLeft >= 8)
850 {
851 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val,
852 idxRegSrc1Ptr, offPage / 8);
853 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val,
854 idxRegSrc2Ptr, offSrc2 / 8);
855 if (fPendingJmp)
856 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
857 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
858 else
859 {
860 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
861 fPendingJmp = true;
862 }
863 pbOpcodes += 8;
864 offPage += 8;
865 offSrc2 += 8;
866 cbLeft -= 8;
867 }
868
869 iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr);
870 /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */
871 }
872 /*
873 * Otherwise, we compare with constants and merge with the general mop-up.
874 */
875 else
876 {
877 while (cbLeft >= 8)
878 {
879 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr,
880 offPage / 8);
881 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val,
882 RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4],
883 pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
884 if (fPendingJmp)
885 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
886 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/);
887 else
888 {
889 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/);
890 fPendingJmp = true;
891 }
892 pbOpcodes += 8;
893 offPage += 8;
894 cbLeft -= 8;
895 }
896 /* max cost thus far: 21 */
897 }
898
899 /* Deal with any remaining bytes (7 or less). */
900 Assert(cbLeft < 8);
901 if (cbLeft >= 4)
902 {
903 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr,
904 offPage / 4);
905 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
906 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
907 if (fPendingJmp)
908 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
909 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
910 else
911 {
912 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
913 fPendingJmp = true;
914 }
915 pbOpcodes += 4;
916 offPage += 4;
917 cbLeft -= 4;
918
919 }
920
921 if (cbLeft >= 2)
922 {
923 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr,
924 offPage / 2);
925 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
926 if (fPendingJmp)
927 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
928 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
929 else
930 {
931 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
932 fPendingJmp = true;
933 }
934 pbOpcodes += 2;
935 offPage += 2;
936 cbLeft -= 2;
937 }
938
939 if (cbLeft > 0)
940 {
941 Assert(cbLeft == 1);
942 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
943 if (fPendingJmp)
944 {
945 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]);
946 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
947 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
948 }
949 else
950 {
951 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/);
952 fPendingJmp = true;
953 }
954 pbOpcodes += 1;
955 offPage += 1;
956 cbLeft -= 1;
957 }
958
959 iemNativeRegFreeTmp(pReNative, idxRegSrc2Val);
960 }
961 Assert(cbLeft == 0);
962
963 /*
964 * Finally, the branch on difference.
965 */
966 if (fPendingJmp)
967 {
968 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
969 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
970 }
971 RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump, idxLabelObsoleteTb);
972
973 /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */
974 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
975 iemNativeRegFreeTmp(pReNative, idxRegSrc1Val);
976 iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr);
977
978#else
979# error "Port me"
980#endif
981 return off;
982}
983
984
985/** Duplicated in IEMAllThrdFuncsBltIn.cpp. */
986DECL_FORCE_INLINE(RTGCPHYS) iemTbGetRangePhysPageAddr(PCIEMTB pTb, uint8_t idxRange)
987{
988 Assert(idxRange < RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges)));
989 uint8_t const idxPage = pTb->aRanges[idxRange].idxPhysPage;
990 Assert(idxPage <= RT_ELEMENTS(pTb->aGCPhysPages));
991 if (idxPage == 0)
992 return pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
993 Assert(!(pTb->aGCPhysPages[idxPage - 1] & GUEST_PAGE_OFFSET_MASK));
994 return pTb->aGCPhysPages[idxPage - 1];
995}
996
997
998/**
999 * Macro that implements PC check after a conditional branch.
1000 */
1001#define BODY_CHECK_PC_AFTER_BRANCH(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
1002 RT_NOREF(a_cbInstr); \
1003 off = iemNativeEmitBltInCheckPcAfterBranch(pReNative, off, a_pTb, a_idxRange, a_offRange)
1004
1005DECL_FORCE_INLINE(uint32_t)
1006iemNativeEmitBltInCheckPcAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb,
1007 uint8_t idxRange, uint16_t offRange)
1008{
1009#ifdef VBOX_STRICT
1010 off = iemNativeEmitMarker(pReNative, off, 0x80000004);
1011#endif
1012
1013 /*
1014 * The GCPhysRangePageWithOffset value in the threaded function is a fixed
1015 * constant for us here.
1016 *
1017 * We can pretend that iem.s.cbInstrBufTotal is X86_PAGE_SIZE here, because
1018 * it serves no purpose as a CS.LIM, if that's needed we've just performed
1019 * it, and as long as we don't implement code TLB reload code here there is
1020 * no point in checking that the TLB data we're using is still valid.
1021 *
1022 * What we to do is.
1023 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1024 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1025 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1026 * we're in the wrong spot and need to find a new TB.
1027 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1028 * GCPhysRangePageWithOffset constant mentioned above.
1029 *
1030 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1031 * in 64-bit code or flat 32-bit.
1032 */
1033
1034 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1035 the temp register, so we don't accidentally clobber something we'll be
1036 needing again immediately. This is why we get idxRegCsBase here. */
1037 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1038 kIemNativeGstRegUse_ReadOnly);
1039 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1040 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1041 kIemNativeGstRegUse_ReadOnly);
1042
1043 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1044
1045#ifdef VBOX_STRICT
1046 /* Do assertions before idxRegTmp contains anything. */
1047 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1048# ifdef RT_ARCH_AMD64
1049 {
1050 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1051 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1052 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1053 {
1054 /* cmp r/m64, imm8 */
1055 pbCodeBuf[off++] = X86_OP_REX_W;
1056 pbCodeBuf[off++] = 0x83;
1057 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1058 pbCodeBuf[off++] = 0;
1059 /* je rel8 */
1060 pbCodeBuf[off++] = 0x74;
1061 pbCodeBuf[off++] = 1;
1062 /* int3 */
1063 pbCodeBuf[off++] = 0xcc;
1064
1065 }
1066
1067 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1068 /* test r/m64, imm32 */
1069 pbCodeBuf[off++] = X86_OP_REX_W;
1070 pbCodeBuf[off++] = 0xf7;
1071 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1072 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1073 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1074 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1075 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1076 /* jz rel8 */
1077 pbCodeBuf[off++] = 0x74;
1078 pbCodeBuf[off++] = 1;
1079 /* int3 */
1080 pbCodeBuf[off++] = 0xcc;
1081 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1082 }
1083# else
1084
1085 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1086 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1087 {
1088 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1089# ifdef RT_ARCH_ARM64
1090 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1091 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1092 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2004);
1093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1094# else
1095# error "Port me!"
1096# endif
1097 }
1098# endif
1099
1100#endif /* VBOX_STRICT */
1101
1102 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1103 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1104 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1105 {
1106#ifdef RT_ARCH_ARM64
1107 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1108 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1109 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1110#else
1111 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1112 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1113#endif
1114 }
1115 else
1116 {
1117#ifdef RT_ARCH_ARM64
1118 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1119 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1120 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1121 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1122#else
1123 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1124 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1125 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1126#endif
1127 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1128 }
1129 iemNativeRegFreeTmp(pReNative, idxRegPc);
1130
1131 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal. */
1132 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1133 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_CheckBranchMiss);
1134
1135 /* 4. Add iem.s.GCPhysInstrBuf and compare with GCPhysRangePageWithOffset. */
1136#ifdef RT_ARCH_AMD64
1137 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1138 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1139 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1140 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1141 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1142
1143#elif defined(RT_ARCH_ARM64)
1144 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1145
1146 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1147 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1148 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1149 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1150
1151# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1152 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1153 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1154 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1155# endif
1156 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1157#else
1158# error "Port me"
1159#endif
1160
1161 RTGCPHYS const GCPhysRangePageWithOffset = ( iemTbGetRangePhysPageAddr(pTb, idxRange)
1162 | pTb->aRanges[idxRange].offPhysPage)
1163 + offRange;
1164 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegTmp, GCPhysRangePageWithOffset,
1165 kIemNativeLabelType_CheckBranchMiss);
1166
1167 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1168 return off;
1169}
1170
1171
1172/**
1173 * Macro that implements TLB loading and updating pbInstrBuf updating for an
1174 * instruction crossing into a new page.
1175 *
1176 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1177 */
1178#define BODY_LOAD_TLB_FOR_NEW_PAGE(a_pTb, a_offInstr, a_idxRange, a_cbInstr) \
1179 RT_NOREF(a_cbInstr); \
1180 off = iemNativeEmitBltLoadTlbForNewPage(pReNative, off, pTb, a_idxRange, a_offInstr)
1181
1182DECL_FORCE_INLINE(uint32_t)
1183iemNativeEmitBltLoadTlbForNewPage(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint8_t offInstr)
1184{
1185#ifdef VBOX_STRICT
1186 off = iemNativeEmitMarker(pReNative, off, 0x80000005);
1187#endif
1188
1189 /*
1190 * Define labels and allocate the register for holding the GCPhys of the new page.
1191 */
1192 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1193 uint32_t const idxRegGCPhys = iemNativeRegAllocTmp(pReNative, &off);
1194 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, IEM_F_MODE_X86_IS_FLAT(pReNative->fExec), &off);
1195 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1196 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1197 : UINT32_MAX;
1198
1199 //off = iemNativeEmitBrk(pReNative, off, 0x1111);
1200
1201 /*
1202 * Jump to the TLB lookup code.
1203 */
1204 if (!TlbState.fSkip)
1205 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1206
1207 /*
1208 * TlbMiss:
1209 *
1210 * Call iemNativeHlpMemCodeNewPageTlbMissWithOff to do the work.
1211 */
1212 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
1213
1214 /* Save variables in volatile registers. */
1215 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegGCPhys);
1216 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1217
1218 /* IEMNATIVE_CALL_ARG1_GREG = offInstr */
1219 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offInstr);
1220
1221 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1222 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1223
1224 /* Done setting up parameters, make the call. */
1225 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMissWithOff);
1226
1227 /* Move the result to the right register. */
1228 if (idxRegGCPhys != IEMNATIVE_CALL_RET_GREG)
1229 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegGCPhys, IEMNATIVE_CALL_RET_GREG);
1230
1231 /* Restore variables and guest shadow registers to volatile registers. */
1232 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1233 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows(true /*fCode*/));
1234
1235#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1236 if (!TlbState.fSkip)
1237 {
1238 /* end of TlbMiss - Jump to the done label. */
1239 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1240 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1241
1242 /*
1243 * TlbLookup:
1244 */
1245 off = iemNativeEmitTlbLookup<false>(pReNative, off, &TlbState,
1246 IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX : X86_SREG_CS,
1247 1 /*cbMem*/, 0 /*fAlignMask*/, IEM_ACCESS_TYPE_EXEC,
1248 idxLabelTlbLookup, idxLabelTlbMiss, idxRegGCPhys, offInstr);
1249
1250# ifdef VBOX_WITH_STATISTICS
1251 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
1252 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeCodeTlbHitsForNewPageWithOffset));
1253# endif
1254
1255 /*
1256 * TlbDone:
1257 */
1258 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1259 TlbState.freeRegsAndReleaseVars(pReNative, UINT8_MAX /*idxVarGCPtrMem*/, true /*fIsCode*/);
1260 }
1261#else
1262 RT_NOREF(idxLabelTlbMiss);
1263#endif
1264
1265 /*
1266 * Now check the physical address of the page matches the expected one.
1267 */
1268 RTGCPHYS const GCPhysNewPage = iemTbGetRangePhysPageAddr(pTb, idxRange);
1269 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegGCPhys, GCPhysNewPage,
1270 kIemNativeLabelType_ObsoleteTb);
1271
1272 iemNativeRegFreeTmp(pReNative, idxRegGCPhys);
1273 return off;
1274}
1275
1276
1277/**
1278 * Macro that implements TLB loading and updating pbInstrBuf updating when
1279 * branching or when crossing a page on an instruction boundrary.
1280 *
1281 * This differs from BODY_LOAD_TLB_FOR_NEW_PAGE in that it will first check if
1282 * it is an inter-page branch and also check the page offset.
1283 *
1284 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1285 */
1286#define BODY_LOAD_TLB_AFTER_BRANCH(a_pTb, a_idxRange, a_cbInstr) \
1287 RT_NOREF(a_cbInstr); \
1288 off = iemNativeEmitBltLoadTlbAfterBranch(pReNative, off, pTb, a_idxRange)
1289
1290DECL_FORCE_INLINE(uint32_t)
1291iemNativeEmitBltLoadTlbAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange)
1292{
1293#ifdef VBOX_STRICT
1294 off = iemNativeEmitMarker(pReNative, off, 0x80000006);
1295#endif
1296
1297 /*
1298 * Define labels and allocate the register for holding the GCPhys of the new page.
1299 */
1300 uint32_t const idxLabelCheckBranchMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckBranchMiss);
1301 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1302 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1303 //
1304
1305 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(pTb, idxRange)
1306 | pTb->aRanges[idxRange].offPhysPage;
1307
1308 /*
1309 *
1310 * First check if RIP is within the current code.
1311 *
1312 * This is very similar to iemNativeEmitBltInCheckPcAfterBranch, the only
1313 * difference is what we do when stuff doesn't match up.
1314 *
1315 * What we to do is.
1316 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1317 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1318 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1319 * we need to retranslate RIP via the TLB.
1320 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1321 * GCPhysRangePageWithOffset constant mentioned above.
1322 *
1323 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1324 * in 64-bit code or flat 32-bit.
1325 *
1326 */
1327
1328 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1329 the temp register, so we don't accidentally clobber something we'll be
1330 needing again immediately. This is why we get idxRegCsBase here. */
1331 /** @todo save+restore active registers and guest shadows in tlb-miss! */
1332 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
1333 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1334 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1335 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1336 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1337 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1338
1339 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off); /* volatile reg is okay for these two */
1340 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1341
1342#ifdef VBOX_STRICT
1343 /* Do assertions before idxRegTmp contains anything. */
1344 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1345# ifdef RT_ARCH_AMD64
1346 {
1347 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1348 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1349 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1350 {
1351 /* cmp r/m64, imm8 */
1352 pbCodeBuf[off++] = X86_OP_REX_W;
1353 pbCodeBuf[off++] = 0x83;
1354 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1355 pbCodeBuf[off++] = 0;
1356 /* je rel8 */
1357 pbCodeBuf[off++] = 0x74;
1358 pbCodeBuf[off++] = 1;
1359 /* int3 */
1360 pbCodeBuf[off++] = 0xcc;
1361
1362 }
1363
1364 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1365 /* test r/m64, imm32 */
1366 pbCodeBuf[off++] = X86_OP_REX_W;
1367 pbCodeBuf[off++] = 0xf7;
1368 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1369 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1370 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1371 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1372 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1373 /* jz rel8 */
1374 pbCodeBuf[off++] = 0x74;
1375 pbCodeBuf[off++] = 1;
1376 /* int3 */
1377 pbCodeBuf[off++] = 0xcc;
1378 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1379 }
1380# else
1381
1382 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1383 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1384 {
1385 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1386# ifdef RT_ARCH_ARM64
1387 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1388 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1389 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2006);
1390 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1391# else
1392# error "Port me!"
1393# endif
1394 }
1395# endif
1396
1397#endif /* VBOX_STRICT */
1398
1399 /* Because we're lazy, we'll jump back here to recalc 'off' and share the
1400 GCPhysRangePageWithOffset check. This is a little risky, so we use the
1401 2nd register to check if we've looped more than once already.*/
1402 off = iemNativeEmitGprZero(pReNative, off, idxRegTmp2);
1403
1404 uint32_t const offLabelRedoChecks = off;
1405
1406 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1407 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1408 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1409 {
1410#ifdef RT_ARCH_ARM64
1411 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1412 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1413 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1414#else
1415 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1416 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1417#endif
1418 }
1419 else
1420 {
1421#ifdef RT_ARCH_ARM64
1422 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1423 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1424 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1425 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1426#else
1427 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1428 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1429 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1430#endif
1431 }
1432
1433 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal.
1434 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll jump to the TLB loading if this fails. */
1435 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1436 uint32_t const offFixedJumpToTlbLoad = off;
1437 off = iemNativeEmitJaToFixed(pReNative, off, off /* (ASSUME ja rel8 suffices) */);
1438
1439 /* 4a. Add iem.s.GCPhysInstrBuf to off ... */
1440#ifdef RT_ARCH_AMD64
1441 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1442 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1443 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1444 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1445 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1446
1447#elif defined(RT_ARCH_ARM64)
1448
1449 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1450 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1451 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1452 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1453
1454# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1455 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1456 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1457 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1458# endif
1459#else
1460# error "Port me"
1461#endif
1462
1463 /* 4b. ... and compare with GCPhysRangePageWithOffset.
1464
1465 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll have to be more
1466 careful and avoid implicit temporary register usage here.
1467
1468 Unlike the threaded version of this code, we do not obsolete TBs here to
1469 reduce the code size and because indirect calls may legally end at the
1470 same offset in two different pages depending on the program state. */
1471 /** @todo synch the threaded BODY_LOAD_TLB_AFTER_BRANCH version with this. */
1472 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegTmp2, GCPhysRangePageWithOffset);
1473 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegTmp, idxRegTmp2);
1474 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelCheckBranchMiss);
1475 uint32_t const offFixedJumpToEnd = off;
1476 off = iemNativeEmitJmpToFixed(pReNative, off, off + 512 /* force rel32 */);
1477
1478 /*
1479 * First we try to go via the TLB.
1480 */
1481 iemNativeFixupFixedJump(pReNative, offFixedJumpToTlbLoad, off);
1482
1483 /* Check that we haven't been here before. */
1484 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxRegTmp2, false /*f64Bit*/, idxLabelCheckBranchMiss);
1485
1486 /*
1487 * TLB miss: Call iemNativeHlpMemCodeNewPageTlbMiss to do the work.
1488 */
1489 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1490
1491 /* Save variables in volatile registers. */
1492 uint32_t const fHstRegsNotToSave = /*TlbState.getRegsNotToSave() | */ RT_BIT_32(idxRegTmp) | RT_BIT_32(idxRegTmp);
1493 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1494
1495 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1496 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1497
1498 /* Done setting up parameters, make the call. */
1499 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMiss);
1500
1501 /* Restore variables and guest shadow registers to volatile registers. */
1502 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1503 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, 0 /*TlbState.getActiveRegsWithShadows()*/);
1504
1505 /* Jmp back to the start and redo the checks. */
1506 off = iemNativeEmitLoadGpr8Imm(pReNative, off, idxRegTmp2, 1); /* indicate that we've looped once already */
1507 off = iemNativeEmitJmpToFixed(pReNative, off, offLabelRedoChecks);
1508
1509 /* The end. */
1510 iemNativeFixupFixedJump(pReNative, offFixedJumpToEnd, off);
1511
1512 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1513 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1514 iemNativeRegFreeTmp(pReNative, idxRegPc);
1515 if (idxRegCsBase != UINT8_MAX)
1516 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1517 return off;
1518}
1519
1520
1521#ifdef BODY_CHECK_CS_LIM
1522/**
1523 * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
1524 * raising a \#GP(0) if this isn't the case.
1525 */
1526IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
1527{
1528 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1529 BODY_SET_CUR_INSTR();
1530 BODY_FLUSH_PENDING_WRITES();
1531 BODY_CHECK_CS_LIM(cbInstr);
1532 return off;
1533}
1534#endif
1535
1536
1537#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
1538/**
1539 * Built-in function for re-checking opcodes and CS.LIM after an instruction
1540 * that may have modified them.
1541 */
1542IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
1543{
1544 PCIEMTB const pTb = pReNative->pTbOrg;
1545 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1546 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1547 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1548 BODY_SET_CUR_INSTR();
1549 BODY_FLUSH_PENDING_WRITES();
1550 BODY_CHECK_CS_LIM(cbInstr);
1551 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1552 return off;
1553}
1554#endif
1555
1556
1557#if defined(BODY_CHECK_OPCODES)
1558/**
1559 * Built-in function for re-checking opcodes after an instruction that may have
1560 * modified them.
1561 */
1562IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
1563{
1564 PCIEMTB const pTb = pReNative->pTbOrg;
1565 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1566 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1567 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1568 BODY_SET_CUR_INSTR();
1569 BODY_FLUSH_PENDING_WRITES();
1570 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1571 return off;
1572}
1573#endif
1574
1575
1576#if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1577/**
1578 * Built-in function for re-checking opcodes and considering the need for CS.LIM
1579 * checking after an instruction that may have modified them.
1580 */
1581IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
1582{
1583 PCIEMTB const pTb = pReNative->pTbOrg;
1584 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1585 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1586 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1587 BODY_SET_CUR_INSTR();
1588 BODY_FLUSH_PENDING_WRITES();
1589 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1590 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1591 return off;
1592}
1593#endif
1594
1595
1596/*
1597 * Post-branching checkers.
1598 */
1599
1600#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1601/**
1602 * Built-in function for checking CS.LIM, checking the PC and checking opcodes
1603 * after conditional branching within the same page.
1604 *
1605 * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
1606 */
1607IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
1608{
1609 PCIEMTB const pTb = pReNative->pTbOrg;
1610 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1611 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1612 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1613 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1614 BODY_SET_CUR_INSTR();
1615 BODY_FLUSH_PENDING_WRITES();
1616 BODY_CHECK_CS_LIM(cbInstr);
1617 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1618 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1619 //LogFunc(("okay\n"));
1620 return off;
1621}
1622#endif
1623
1624
1625#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
1626/**
1627 * Built-in function for checking the PC and checking opcodes after conditional
1628 * branching within the same page.
1629 *
1630 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1631 */
1632IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
1633{
1634 PCIEMTB const pTb = pReNative->pTbOrg;
1635 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1636 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1637 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1638 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1639 BODY_SET_CUR_INSTR();
1640 BODY_FLUSH_PENDING_WRITES();
1641 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1642 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1643 //LogFunc(("okay\n"));
1644 return off;
1645}
1646#endif
1647
1648
1649#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1650/**
1651 * Built-in function for checking the PC and checking opcodes and considering
1652 * the need for CS.LIM checking after conditional branching within the same
1653 * page.
1654 *
1655 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1656 */
1657IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
1658{
1659 PCIEMTB const pTb = pReNative->pTbOrg;
1660 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1661 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1662 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1663 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1664 BODY_SET_CUR_INSTR();
1665 BODY_FLUSH_PENDING_WRITES();
1666 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1667 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1668 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1669 //LogFunc(("okay\n"));
1670 return off;
1671}
1672#endif
1673
1674
1675#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1676/**
1677 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1678 * transitioning to a different code page.
1679 *
1680 * The code page transition can either be natural over onto the next page (with
1681 * the instruction starting at page offset zero) or by means of branching.
1682 *
1683 * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1684 */
1685IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
1686{
1687 PCIEMTB const pTb = pReNative->pTbOrg;
1688 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1689 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1690 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1691 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1692 BODY_SET_CUR_INSTR();
1693 BODY_FLUSH_PENDING_WRITES();
1694 BODY_CHECK_CS_LIM(cbInstr);
1695 Assert(offRange == 0);
1696 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1697 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1698 //LogFunc(("okay\n"));
1699 return off;
1700}
1701#endif
1702
1703
1704#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
1705/**
1706 * Built-in function for loading TLB and checking opcodes when transitioning to
1707 * a different code page.
1708 *
1709 * The code page transition can either be natural over onto the next page (with
1710 * the instruction starting at page offset zero) or by means of branching.
1711 *
1712 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1713 */
1714IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
1715{
1716 PCIEMTB const pTb = pReNative->pTbOrg;
1717 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1718 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1719 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1720 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1721 BODY_SET_CUR_INSTR();
1722 BODY_FLUSH_PENDING_WRITES();
1723 Assert(offRange == 0);
1724 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1725 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1726 //LogFunc(("okay\n"));
1727 return off;
1728}
1729#endif
1730
1731
1732#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1733/**
1734 * Built-in function for loading TLB and checking opcodes and considering the
1735 * need for CS.LIM checking when transitioning to a different code page.
1736 *
1737 * The code page transition can either be natural over onto the next page (with
1738 * the instruction starting at page offset zero) or by means of branching.
1739 *
1740 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1741 */
1742IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
1743{
1744 PCIEMTB const pTb = pReNative->pTbOrg;
1745 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1746 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1747 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1748 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1749 BODY_SET_CUR_INSTR();
1750 BODY_FLUSH_PENDING_WRITES();
1751 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1752 Assert(offRange == 0);
1753 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1754 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1755 //LogFunc(("okay\n"));
1756 return off;
1757}
1758#endif
1759
1760
1761
1762/*
1763 * Natural page crossing checkers.
1764 */
1765
1766#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1767/**
1768 * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
1769 * both pages when transitioning to a different code page.
1770 *
1771 * This is used when the previous instruction requires revalidation of opcodes
1772 * bytes and the current instruction stries a page boundrary with opcode bytes
1773 * in both the old and new page.
1774 *
1775 * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1776 */
1777IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
1778{
1779 PCIEMTB const pTb = pReNative->pTbOrg;
1780 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1781 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1782 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1783 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1784 uint32_t const idxRange2 = idxRange1 + 1;
1785 BODY_SET_CUR_INSTR();
1786 BODY_FLUSH_PENDING_WRITES();
1787 BODY_CHECK_CS_LIM(cbInstr);
1788 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1789 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1790 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1791 return off;
1792}
1793#endif
1794
1795
1796#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1797/**
1798 * Built-in function for loading TLB and checking opcodes on both pages when
1799 * transitioning to a different code page.
1800 *
1801 * This is used when the previous instruction requires revalidation of opcodes
1802 * bytes and the current instruction stries a page boundrary with opcode bytes
1803 * in both the old and new page.
1804 *
1805 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1806 */
1807IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
1808{
1809 PCIEMTB const pTb = pReNative->pTbOrg;
1810 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1811 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1812 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1813 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1814 uint32_t const idxRange2 = idxRange1 + 1;
1815 BODY_SET_CUR_INSTR();
1816 BODY_FLUSH_PENDING_WRITES();
1817 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1818 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1819 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1820 return off;
1821}
1822#endif
1823
1824
1825#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1826/**
1827 * Built-in function for loading TLB and checking opcodes on both pages and
1828 * considering the need for CS.LIM checking when transitioning to a different
1829 * code page.
1830 *
1831 * This is used when the previous instruction requires revalidation of opcodes
1832 * bytes and the current instruction stries a page boundrary with opcode bytes
1833 * in both the old and new page.
1834 *
1835 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1836 */
1837IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
1838{
1839 PCIEMTB const pTb = pReNative->pTbOrg;
1840 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1841 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1842 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1843 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1844 uint32_t const idxRange2 = idxRange1 + 1;
1845 BODY_SET_CUR_INSTR();
1846 BODY_FLUSH_PENDING_WRITES();
1847 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1848 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1849 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1850 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1851 return off;
1852}
1853#endif
1854
1855
1856#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1857/**
1858 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1859 * advancing naturally to a different code page.
1860 *
1861 * Only opcodes on the new page is checked.
1862 *
1863 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1864 */
1865IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
1866{
1867 PCIEMTB const pTb = pReNative->pTbOrg;
1868 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1869 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1870 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1871 //uint32_t const offRange1 = (uint32_t)uParam2;
1872 uint32_t const idxRange2 = idxRange1 + 1;
1873 BODY_SET_CUR_INSTR();
1874 BODY_FLUSH_PENDING_WRITES();
1875 BODY_CHECK_CS_LIM(cbInstr);
1876 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1877 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1878 return off;
1879}
1880#endif
1881
1882
1883#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1884/**
1885 * Built-in function for loading TLB and checking opcodes when advancing
1886 * naturally to a different code page.
1887 *
1888 * Only opcodes on the new page is checked.
1889 *
1890 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1891 */
1892IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
1893{
1894 PCIEMTB const pTb = pReNative->pTbOrg;
1895 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1896 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1897 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1898 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1899 uint32_t const idxRange2 = idxRange1 + 1;
1900 BODY_SET_CUR_INSTR();
1901 BODY_FLUSH_PENDING_WRITES();
1902 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1903 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1904 return off;
1905}
1906#endif
1907
1908
1909#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1910/**
1911 * Built-in function for loading TLB and checking opcodes and considering the
1912 * need for CS.LIM checking when advancing naturally to a different code page.
1913 *
1914 * Only opcodes on the new page is checked.
1915 *
1916 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1917 */
1918IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
1919{
1920 PCIEMTB const pTb = pReNative->pTbOrg;
1921 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1922 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1923 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1924 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1925 uint32_t const idxRange2 = idxRange1 + 1;
1926 BODY_SET_CUR_INSTR();
1927 BODY_FLUSH_PENDING_WRITES();
1928 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1929 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1930 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1931 return off;
1932}
1933#endif
1934
1935
1936#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1937/**
1938 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1939 * advancing naturally to a different code page with first instr at byte 0.
1940 *
1941 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1942 */
1943IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
1944{
1945 PCIEMTB const pTb = pReNative->pTbOrg;
1946 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1947 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1948 BODY_SET_CUR_INSTR();
1949 BODY_FLUSH_PENDING_WRITES();
1950 BODY_CHECK_CS_LIM(cbInstr);
1951 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1952 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1953 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1954 return off;
1955}
1956#endif
1957
1958
1959#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1960/**
1961 * Built-in function for loading TLB and checking opcodes when advancing
1962 * naturally to a different code page with first instr at byte 0.
1963 *
1964 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1965 */
1966IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
1967{
1968 PCIEMTB const pTb = pReNative->pTbOrg;
1969 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1970 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1971 BODY_SET_CUR_INSTR();
1972 BODY_FLUSH_PENDING_WRITES();
1973 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1974 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1975 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1976 return off;
1977}
1978#endif
1979
1980
1981#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1982/**
1983 * Built-in function for loading TLB and checking opcodes and considering the
1984 * need for CS.LIM checking when advancing naturally to a different code page
1985 * with first instr at byte 0.
1986 *
1987 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1988 */
1989IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
1990{
1991 PCIEMTB const pTb = pReNative->pTbOrg;
1992 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1993 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1994 BODY_SET_CUR_INSTR();
1995 BODY_FLUSH_PENDING_WRITES();
1996 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1997 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1998 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1999 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
2000 return off;
2001}
2002#endif
2003
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette