VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompBltIn.cpp@ 102701

Last change on this file since 102701 was 102701, checked in by vboxsync, 16 months ago

VMM/IEM: A couple of ARM fixes. Disassembly fix. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 86.3 KB
Line 
1/* $Id: IEMAllN8veRecompBltIn.cpp 102701 2023-12-26 00:20:10Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, Emitters for Built-In Threaded Functions.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33//#define IEM_WITH_OPAQUE_DECODER_STATE - need offCurInstrStart access for iemNativeHlpMemCodeNewPageTlbMiss and friends.
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#include <VBox/vmm/iem.h>
37#include <VBox/vmm/cpum.h>
38#include <VBox/vmm/dbgf.h>
39#include "IEMInternal.h"
40#include <VBox/vmm/vmcc.h>
41#include <VBox/log.h>
42#include <VBox/err.h>
43#include <VBox/param.h>
44#include <iprt/assert.h>
45#include <iprt/string.h>
46#if defined(RT_ARCH_AMD64)
47# include <iprt/x86.h>
48#elif defined(RT_ARCH_ARM64)
49# include <iprt/armv8.h>
50#endif
51
52
53#include "IEMInline.h"
54#include "IEMThreadedFunctions.h"
55#include "IEMN8veRecompiler.h"
56#include "IEMN8veRecompilerEmit.h"
57
58
59
60/*********************************************************************************************************************************
61* TB Helper Functions *
62*********************************************************************************************************************************/
63#ifdef RT_ARCH_AMD64
64DECLASM(void) iemNativeHlpAsmSafeWrapLogCpuState(void);
65#endif
66
67
68/**
69 * Used by TB code to deal with a TLB miss for a new page.
70 */
71IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCodeNewPageTlbMiss,(PVMCPUCC pVCpu))
72{
73 pVCpu->iem.s.pbInstrBuf = NULL;
74 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE;
75 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
76 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
77 if (pVCpu->iem.s.pbInstrBuf)
78 { /* likely */ }
79 else
80 {
81 IEM_DO_LONGJMP(pVCpu, VINF_IEM_REEXEC_BREAK);
82 }
83}
84
85
86/**
87 * Used by TB code to deal with a TLB miss for a new page.
88 */
89IEM_DECL_NATIVE_HLP_DEF(RTGCPHYS, iemNativeHlpMemCodeNewPageTlbMissWithOff,(PVMCPUCC pVCpu, uint8_t offInstr))
90{
91 pVCpu->iem.s.pbInstrBuf = NULL;
92 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE - offInstr;
93 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
94 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
95 return pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf : NIL_RTGCPHYS;
96}
97
98
99/*********************************************************************************************************************************
100* Builtin functions *
101*********************************************************************************************************************************/
102
103/**
104 * Built-in function that does nothing.
105 *
106 * Whether this is called or not can be controlled by the entry in the
107 * IEMThreadedGenerator.katBltIns table. This can be useful to determine
108 * whether why behaviour changes when enabling the LogCpuState builtins. I.e.
109 * whether it's the reduced call count in the TBs or the threaded calls flushing
110 * register state.
111 */
112IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_Nop)
113{
114 RT_NOREF(pReNative, pCallEntry);
115 return off;
116}
117
118
119/**
120 * Emits for for LogCpuState.
121 *
122 * This shouldn't have any relevant impact on the recompiler state.
123 */
124IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_LogCpuState)
125{
126#ifdef RT_ARCH_AMD64
127 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
128 /* push rax */
129 pbCodeBuf[off++] = 0x50 + X86_GREG_xAX;
130 /* push imm32 */
131 pbCodeBuf[off++] = 0x68;
132 pbCodeBuf[off++] = RT_BYTE1(pCallEntry->auParams[0]);
133 pbCodeBuf[off++] = RT_BYTE2(pCallEntry->auParams[0]);
134 pbCodeBuf[off++] = RT_BYTE3(pCallEntry->auParams[0]);
135 pbCodeBuf[off++] = RT_BYTE4(pCallEntry->auParams[0]);
136 /* mov rax, iemNativeHlpAsmSafeWrapLogCpuState */
137 pbCodeBuf[off++] = X86_OP_REX_W;
138 pbCodeBuf[off++] = 0xb8 + X86_GREG_xAX;
139 *(uint64_t *)&pbCodeBuf[off] = (uintptr_t)iemNativeHlpAsmSafeWrapLogCpuState;
140 off += sizeof(uint64_t);
141 /* call rax */
142 pbCodeBuf[off++] = 0xff;
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
144 /* pop rax */
145 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
146 /* pop rax */
147 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
148 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
149
150#else
151 /** @todo Implement this */
152 AssertFailed();
153 RT_NOREF(pReNative, pCallEntry);
154#endif
155 return off;
156}
157
158
159/**
160 * Built-in function that calls a C-implemention function taking zero arguments.
161 */
162IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
163{
164 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
165 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
166 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
167 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
168}
169
170
171/**
172 * Built-in function that checks for pending interrupts that can be delivered or
173 * forced action flags.
174 *
175 * This triggers after the completion of an instruction, so EIP is already at
176 * the next instruction. If an IRQ or important FF is pending, this will return
177 * a non-zero status that stops TB execution.
178 */
179IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
180{
181 RT_NOREF(pCallEntry);
182
183 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
184 and I'm too lazy to create a 'Fixed' version of that one. */
185 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
186 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
187
188 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
189
190 /* Again, we need to load the extended EFLAGS before we actually need them
191 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
192 loaded them inside the check, as the shadow state would not be correct
193 when the code branches before the load. Ditto PC. */
194 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
195 kIemNativeGstRegUse_ReadOnly);
196
197 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
198
199 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
200
201 /*
202 * Start by checking the local forced actions of the EMT we're on for IRQs
203 * and other FFs that needs servicing.
204 */
205 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
206 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
207 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
208 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
209 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
210 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
211 | VMCPU_FF_TLB_FLUSH
212 | VMCPU_FF_UNHALT ),
213 true /*fSetFlags*/);
214 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
215 uint32_t const offFixupJumpToVmCheck1 = off;
216 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices */);
217
218 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
219 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
220 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
221 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
222 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
223 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
224
225 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
226 suppressed by the CPU or not. */
227 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
228 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
229 idxLabelReturnBreak);
230
231 /* We've got shadow flags set, so we must check that the PC they are valid
232 for matches our current PC value. */
233 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
234 * a register. */
235 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
236 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
237
238 /*
239 * Now check the force flags of the VM.
240 */
241 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
242 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
243 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
244 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
245 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
246 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
247
248 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
249
250 /*
251 * We're good, no IRQs or FFs pending.
252 */
253 iemNativeRegFreeTmp(pReNative, idxTmpReg);
254 iemNativeRegFreeTmp(pReNative, idxEflReg);
255 iemNativeRegFreeTmp(pReNative, idxPcReg);
256
257 return off;
258}
259
260
261/**
262 * Built-in function checks if IEMCPU::fExec has the expected value.
263 */
264IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
265{
266 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
267 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
268
269 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
270 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
271 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
272 kIemNativeLabelType_ReturnBreak);
273 iemNativeRegFreeTmp(pReNative, idxTmpReg);
274 return off;
275}
276
277
278/**
279 * Sets idxTbCurInstr in preparation of raising an exception or aborting the TB.
280 */
281/** @todo Optimize this, so we don't set the same value more than once. Just
282 * needs some tracking. */
283#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
284# define BODY_SET_CUR_INSTR() \
285 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
286#else
287# define BODY_SET_CUR_INSTR() ((void)0)
288#endif
289
290/**
291 * Flushes pending writes in preparation of raising an exception or aborting the TB.
292 */
293#define BODY_FLUSH_PENDING_WRITES() \
294 off = iemNativeRegFlushPendingWrites(pReNative, off);
295
296
297/**
298 * Macro that emits the 16/32-bit CS.LIM check.
299 */
300#define BODY_CHECK_CS_LIM(a_cbInstr) \
301 off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
302
303DECL_FORCE_INLINE(uint32_t)
304iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
305{
306 Assert(cbInstr > 0);
307 Assert(cbInstr < 16);
308#ifdef VBOX_STRICT
309 off = iemNativeEmitMarker(pReNative, off, 0x80000001);
310#endif
311
312 /*
313 * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
314 * a temporary register for calculating the last address of the instruction.
315 *
316 * The calculation and comparisons are 32-bit. We ASSUME that the incoming
317 * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
318 * that last updated EIP here checked it already, and that we're therefore
319 * safe in the 32-bit wrap-around scenario to only check that the last byte
320 * is within CS.LIM. In the case of instruction-by-instruction advancing
321 * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
322 * must be using 4KB granularity and the previous instruction was fine.
323 */
324 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
325 kIemNativeGstRegUse_ReadOnly);
326 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
327 kIemNativeGstRegUse_ReadOnly);
328#ifdef RT_ARCH_AMD64
329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
330#elif defined(RT_ARCH_ARM64)
331 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
332#else
333# error "Port me"
334#endif
335
336 if (cbInstr != 1)
337 {
338 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
339
340 /*
341 * 1. idxRegTmp = idxRegPc + cbInstr;
342 * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
343 */
344#ifdef RT_ARCH_AMD64
345 /* 1. lea tmp32, [Pc + cbInstr - 1] */
346 if (idxRegTmp >= 8 || idxRegPc >= 8)
347 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
348 pbCodeBuf[off++] = 0x8d;
349 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
350 if ((idxRegPc & 7) == X86_GREG_xSP)
351 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
352 pbCodeBuf[off++] = cbInstr - 1;
353
354 /* 2. cmp tmp32(r), CsLim(r/m). */
355 if (idxRegTmp >= 8 || idxRegCsLim >= 8)
356 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
357 pbCodeBuf[off++] = 0x3b;
358 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
359
360#elif defined(RT_ARCH_ARM64)
361 /* 1. add tmp32, Pc, #cbInstr-1 */
362 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
363 /* 2. cmp tmp32, CsLim */
364 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
365 false /*f64Bit*/, true /*fSetFlags*/);
366
367#endif
368 iemNativeRegFreeTmp(pReNative, idxRegTmp);
369 }
370 else
371 {
372 /*
373 * Here we can skip step 1 and compare PC and CS.LIM directly.
374 */
375#ifdef RT_ARCH_AMD64
376 /* 2. cmp eip(r), CsLim(r/m). */
377 if (idxRegPc >= 8 || idxRegCsLim >= 8)
378 pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
379 pbCodeBuf[off++] = 0x3b;
380 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
381
382#elif defined(RT_ARCH_ARM64)
383 /* 2. cmp Pc, CsLim */
384 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
385 false /*f64Bit*/, true /*fSetFlags*/);
386
387#endif
388 }
389
390 /* 3. Jump if greater. */
391 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
392
393 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
394 iemNativeRegFreeTmp(pReNative, idxRegPc);
395 return off;
396}
397
398
399/**
400 * Macro that considers whether we need CS.LIM checking after a branch or
401 * crossing over to a new page.
402 */
403#define BODY_CONSIDER_CS_LIM_CHECKING(a_pTb, a_cbInstr) \
404 RT_NOREF(a_cbInstr); \
405 off = iemNativeEmitBltInConsiderLimChecking(pReNative, off)
406
407DECL_FORCE_INLINE(uint32_t)
408iemNativeEmitBltInConsiderLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off)
409{
410#ifdef VBOX_STRICT
411 off = iemNativeEmitMarker(pReNative, off, 0x80000002);
412#endif
413
414 /*
415 * This check must match the ones in the iem in iemGetTbFlagsForCurrentPc
416 * exactly:
417 *
418 * int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
419 * if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
420 * return fRet;
421 * return fRet | IEMTB_F_CS_LIM_CHECKS;
422 *
423 *
424 * We need EIP, CS.LIM and CS.BASE here.
425 */
426
427 /* Calculate the offFromLim first: */
428 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
429 kIemNativeGstRegUse_ReadOnly);
430 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
431 kIemNativeGstRegUse_ReadOnly);
432 uint8_t const idxRegLeft = iemNativeRegAllocTmp(pReNative, &off);
433
434#ifdef RT_ARCH_ARM64
435 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
436 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegLeft, idxRegCsLim, idxRegPc);
437 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
438#else
439 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegLeft, idxRegCsLim);
440 off = iemNativeEmitSubTwoGprs(pReNative, off, idxRegLeft, idxRegPc);
441#endif
442
443 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
444 iemNativeRegFreeTmp(pReNative, idxRegPc);
445
446 /* Calculate the threshold level (right side). */
447 uint8_t const idxRegCsBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
448 kIemNativeGstRegUse_ReadOnly);
449 uint8_t const idxRegRight = iemNativeRegAllocTmp(pReNative, &off);
450
451#ifdef RT_ARCH_ARM64
452 pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
453 Assert(Armv8A64ConvertImmRImmS2Mask32(11, 0) == GUEST_PAGE_OFFSET_MASK);
454 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegRight, idxRegCsBase, 11, 0, false /*f64Bit*/);
455 pu32CodeBuf[off++] = Armv8A64MkInstrNeg(idxRegRight);
456 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
457 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
458 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
459
460#else
461 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegRight, GUEST_PAGE_OFFSET_MASK);
462 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegRight, idxRegCsBase);
463 off = iemNativeEmitNegGpr(pReNative, off, idxRegRight);
464 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRight, X86_PAGE_SIZE + 16);
465#endif
466
467 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
468
469 /* Compare the two and jump out if we're too close to the limit. */
470 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegLeft, idxRegRight);
471 off = iemNativeEmitJlToNewLabel(pReNative, off, kIemNativeLabelType_NeedCsLimChecking);
472
473 iemNativeRegFreeTmp(pReNative, idxRegRight);
474 iemNativeRegFreeTmp(pReNative, idxRegLeft);
475 return off;
476}
477
478
479
480/**
481 * Macro that implements opcode (re-)checking.
482 */
483#define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
484 RT_NOREF(a_cbInstr); \
485 off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
486
487#if 0 /* debugging aid */
488bool g_fBpOnObsoletion = false;
489# define BP_ON_OBSOLETION g_fBpOnObsoletion
490#else
491# define BP_ON_OBSOLETION 0
492#endif
493
494DECL_FORCE_INLINE(uint32_t)
495iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
496{
497 Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
498 Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
499#ifdef VBOX_STRICT
500 off = iemNativeEmitMarker(pReNative, off, 0x80000003);
501#endif
502
503 uint32_t const idxLabelObsoleteTb = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ObsoleteTb);
504
505 /*
506 * Where to start and how much to compare.
507 *
508 * Looking at the ranges produced when r160746 was running a DOS VM with TB
509 * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
510 * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
511 *
512 * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
513 * 12, 10, 11, 6, 13, 15 and 16. Max 0x359 bytes. Same revision as above.
514 */
515 uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange;
516 uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange;
517 Assert(cbLeft > 0);
518 uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes + offRange];
519 uint32_t offConsolidatedJump = UINT32_MAX;
520
521#ifdef RT_ARCH_AMD64
522 /* AMD64/x86 offers a bunch of options. Smaller stuff will can be
523 completely inlined, for larger we use REPE CMPS. */
524# define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */ do { \
525 pbCodeBuf[off++] = a_bOpcode; \
526 Assert(offPage < 127); \
527 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
528 pbCodeBuf[off++] = RT_BYTE1(offPage); \
529 } while (0)
530
531# define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
532 if (offConsolidatedJump != UINT32_MAX) \
533 { \
534 int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
535 Assert(offDisp >= -128); \
536 pbCodeBuf[off++] = 0x75; /* jnz near */ \
537 pbCodeBuf[off++] = (uint8_t)offDisp; \
538 } \
539 else \
540 { \
541 pbCodeBuf[off++] = 0x74; /* jz near +5 */ \
542 pbCodeBuf[off++] = 0x05 + BP_ON_OBSOLETION; \
543 offConsolidatedJump = off; \
544 if (BP_ON_OBSOLETION) pbCodeBuf[off++] = 0xcc; \
545 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */ \
546 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_Rel32, -4); \
547 pbCodeBuf[off++] = 0x00; \
548 pbCodeBuf[off++] = 0x00; \
549 pbCodeBuf[off++] = 0x00; \
550 pbCodeBuf[off++] = 0x00; \
551 } \
552 } while (0)
553
554# define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
555 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
556 pbCodeBuf[off++] = *pbOpcodes++; \
557 pbCodeBuf[off++] = *pbOpcodes++; \
558 pbCodeBuf[off++] = *pbOpcodes++; \
559 pbCodeBuf[off++] = *pbOpcodes++; \
560 cbLeft -= 4; \
561 offPage += 4; \
562 CHECK_OPCODES_CMP_JMP(); \
563 } while (0)
564
565# define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
566 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
567 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
568 pbCodeBuf[off++] = *pbOpcodes++; \
569 pbCodeBuf[off++] = *pbOpcodes++; \
570 cbLeft -= 2; \
571 offPage += 2; \
572 CHECK_OPCODES_CMP_JMP(); \
573 } while (0)
574
575# define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
576 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
577 pbCodeBuf[off++] = *pbOpcodes++; \
578 cbLeft -= 1; \
579 offPage += 1; \
580 CHECK_OPCODES_CMP_JMP(); \
581 } while (0)
582
583# define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
584 if (a_bPrefix) \
585 pbCodeBuf[off++] = (a_bPrefix); \
586 pbCodeBuf[off++] = (a_bOpcode); \
587 CHECK_OPCODES_CMP_JMP(); \
588 cbLeft -= (a_cbToSubtract); \
589 } while (0)
590
591# define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
592 pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
593 pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
594 pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
595 pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
596 pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
597 } while (0)
598
599 if (cbLeft <= 24)
600 {
601 uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
602 ( RT_BIT_32(X86_GREG_xAX)
603 | RT_BIT_32(X86_GREG_xCX)
604 | RT_BIT_32(X86_GREG_xDX)
605 | RT_BIT_32(X86_GREG_xBX)
606 | RT_BIT_32(X86_GREG_xSI)
607 | RT_BIT_32(X86_GREG_xDI))
608 & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
609 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
610 if (offPage >= 128 - cbLeft)
611 {
612 off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
613 offPage &= 3;
614 }
615
616 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 14 + 54 + 8 + 6 + BP_ON_OBSOLETION /* = 87 */);
617
618 if (cbLeft > 8)
619 switch (offPage & 3)
620 {
621 case 0:
622 break;
623 case 1: /* cost: 6 + 8 = 14 */
624 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
625 RT_FALL_THRU();
626 case 2: /* cost: 8 */
627 CHECK_OPCODES_CMP_IMM16(idxRegTmp);
628 break;
629 case 3: /* cost: 6 */
630 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
631 break;
632 }
633
634 while (cbLeft >= 4)
635 CHECK_OPCODES_CMP_IMM32(idxRegTmp); /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
636
637 if (cbLeft >= 2)
638 CHECK_OPCODES_CMP_IMM16(idxRegTmp); /* cost: 8 */
639 if (cbLeft)
640 CHECK_OPCODES_CMP_IMM8(idxRegTmp); /* cost: 6 */
641
642 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
643 iemNativeRegFreeTmp(pReNative, idxRegTmp);
644 }
645 else
646 {
647 /* RDI = &pbInstrBuf[offPage] */
648 uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
649 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
650 if (offPage != 0)
651 off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
652
653 /* RSI = pbOpcodes */
654 uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
655 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
656
657 /* RCX = counts. */
658 uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
659
660 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 10 + 5 + 5 + 3 + 4 + 3 + BP_ON_OBSOLETION /*= 35*/);
661
662 /** @todo profile and optimize this further. Maybe an idea to align by
663 * offPage if the two cannot be reconsidled. */
664 /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
665 switch (offPage & 7) /* max cost: 10 */
666 {
667 case 0:
668 break;
669 case 1: /* cost: 3+4+3 = 10 */
670 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
671 RT_FALL_THRU();
672 case 2: /* cost: 4+3 = 7 */
673 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
674 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
675 break;
676 case 3: /* cost: 3+3 = 6 */
677 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
678 RT_FALL_THRU();
679 case 4: /* cost: 3 */
680 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
681 break;
682 case 5: /* cost: 3+4 = 7 */
683 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
684 RT_FALL_THRU();
685 case 6: /* cost: 4 */
686 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
687 break;
688 case 7: /* cost: 3 */
689 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
690 break;
691 }
692
693 /* Compare qwords: */
694 uint32_t const cQWords = cbLeft >> 3;
695 CHECK_OPCODES_ECX_IMM(cQWords); /* cost: 5 */
696
697 pbCodeBuf[off++] = X86_OP_PRF_REPZ; /* cost: 5 */
698 CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
699 cbLeft &= 7;
700
701 if (cbLeft & 4)
702 CHECK_OPCODES_CMPSX(0xa7, 4, 0); /* cost: 3 */
703 if (cbLeft & 2)
704 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP); /* cost: 4 */
705 if (cbLeft & 1)
706 CHECK_OPCODES_CMPSX(0xa6, 1, 0); /* cost: 3 */
707
708 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
709 iemNativeRegFreeTmp(pReNative, idxRegCx);
710 iemNativeRegFreeTmp(pReNative, idxRegSi);
711 iemNativeRegFreeTmp(pReNative, idxRegDi);
712 }
713
714#elif defined(RT_ARCH_ARM64)
715 /* We need pbInstrBuf in a register, whatever we do. */
716 uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off);
717 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
718
719 /* We also need at least one more register for holding bytes & words we
720 load via pbInstrBuf. */
721 uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off);
722
723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
724
725 /* One byte compare can be done with the opcode byte as an immediate. We'll
726 do this to uint16_t align src1. */
727 bool fPendingJmp = RT_BOOL(offPage & 1);
728 if (fPendingJmp)
729 {
730 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
731 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/);
732 offPage += 1;
733 cbLeft -= 1;
734 }
735
736 if (cbLeft > 0)
737 {
738 /* We need a register for holding the opcode bytes we're comparing with,
739 as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */
740 uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off);
741
742 /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */
743 if ((offPage & 3) && cbLeft >= 2)
744 {
745 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2);
746 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
747 if (fPendingJmp)
748 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
749 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
750 else
751 {
752 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
753 fPendingJmp = true;
754 }
755 pbOpcodes += 2;
756 offPage += 2;
757 cbLeft -= 2;
758 }
759
760 /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */
761 if ((offPage & 7) && cbLeft >= 4)
762 {
763 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4);
764 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
765 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
766 if (fPendingJmp)
767 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
768 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
769 else
770 {
771 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
772 fPendingJmp = true;
773 }
774 pbOpcodes += 4;
775 offPage += 4;
776 cbLeft -= 4;
777 }
778
779 /*
780 * If we've got 16 bytes or more left, switch to memcmp-style.
781 */
782 if (cbLeft >= 16)
783 {
784 /* We need a pointer to the copy of the original opcode bytes. */
785 uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off);
786 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes);
787
788 /* If there are more than 32 bytes to compare we create a loop, for
789 which we'll need a loop register. */
790 if (cbLeft >= 64)
791 {
792 if (fPendingJmp)
793 {
794 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
795 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
796 fPendingJmp = false;
797 }
798
799 uint8_t const idxRegLoop = iemNativeRegAllocTmp(pReNative, &off);
800 uint16_t const cLoops = cbLeft / 32;
801 cbLeft = cbLeft % 32;
802 pbOpcodes += cLoops * 32;
803 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops);
804
805 if (offPage != 0) /** @todo optimize out this instruction. */
806 {
807 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage);
808 offPage = 0;
809 }
810
811 uint32_t const offLoopStart = off;
812 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0);
813 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0);
814 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
815
816 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1);
817 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1);
818 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
819 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
820
821 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2);
822 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2);
823 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
824 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
825
826 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3);
827 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3);
828 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
829 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
830
831 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
832 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
833
834 /* Advance and loop. */
835 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20);
836 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20);
837 pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/);
838 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off);
839
840 iemNativeRegFreeTmp(pReNative, idxRegLoop);
841 }
842
843 /* Deal with any remaining dwords (uint64_t). There can be up to
844 three if we looped and four if we didn't. */
845 uint32_t offSrc2 = 0;
846 while (cbLeft >= 8)
847 {
848 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val,
849 idxRegSrc1Ptr, offPage / 8);
850 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val,
851 idxRegSrc2Ptr, offSrc2 / 8);
852 if (fPendingJmp)
853 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
854 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
855 else
856 {
857 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
858 fPendingJmp = true;
859 }
860 pbOpcodes += 8;
861 offPage += 8;
862 offSrc2 += 8;
863 cbLeft -= 8;
864 }
865
866 iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr);
867 /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */
868 }
869 /*
870 * Otherwise, we compare with constants and merge with the general mop-up.
871 */
872 else
873 {
874 while (cbLeft >= 8)
875 {
876 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr,
877 offPage / 8);
878 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val,
879 RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4],
880 pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
881 if (fPendingJmp)
882 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
883 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/);
884 else
885 {
886 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/);
887 fPendingJmp = true;
888 }
889 pbOpcodes += 8;
890 offPage += 8;
891 cbLeft -= 8;
892 }
893 /* max cost thus far: 21 */
894 }
895
896 /* Deal with any remaining bytes (7 or less). */
897 Assert(cbLeft < 8);
898 if (cbLeft >= 4)
899 {
900 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr,
901 offPage / 4);
902 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
903 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
904 if (fPendingJmp)
905 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
906 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
907 else
908 {
909 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
910 fPendingJmp = true;
911 }
912 pbOpcodes += 4;
913 offPage += 4;
914 cbLeft -= 4;
915
916 }
917
918 if (cbLeft >= 2)
919 {
920 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr,
921 offPage / 2);
922 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
923 if (fPendingJmp)
924 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
925 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
926 else
927 {
928 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
929 fPendingJmp = true;
930 }
931 pbOpcodes += 2;
932 offPage += 2;
933 cbLeft -= 2;
934 }
935
936 if (cbLeft > 0)
937 {
938 Assert(cbLeft == 1);
939 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
940 if (fPendingJmp)
941 {
942 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]);
943 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
944 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
945 }
946 else
947 {
948 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/);
949 fPendingJmp = true;
950 }
951 pbOpcodes += 1;
952 offPage += 1;
953 cbLeft -= 1;
954 }
955
956 iemNativeRegFreeTmp(pReNative, idxRegSrc2Val);
957 }
958 Assert(cbLeft == 0);
959
960 /*
961 * Finally, the branch on difference.
962 */
963 if (fPendingJmp)
964 {
965 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
966 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
967 }
968 RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump, idxLabelObsoleteTb);
969
970 /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 iemNativeRegFreeTmp(pReNative, idxRegSrc1Val);
973 iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr);
974
975#else
976# error "Port me"
977#endif
978 return off;
979}
980
981
982/** Duplicated in IEMAllThrdFuncsBltIn.cpp. */
983DECL_FORCE_INLINE(RTGCPHYS) iemTbGetRangePhysPageAddr(PCIEMTB pTb, uint8_t idxRange)
984{
985 Assert(idxRange < RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges)));
986 uint8_t const idxPage = pTb->aRanges[idxRange].idxPhysPage;
987 Assert(idxPage <= RT_ELEMENTS(pTb->aGCPhysPages));
988 if (idxPage == 0)
989 return pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
990 Assert(!(pTb->aGCPhysPages[idxPage - 1] & GUEST_PAGE_OFFSET_MASK));
991 return pTb->aGCPhysPages[idxPage - 1];
992}
993
994
995/**
996 * Macro that implements PC check after a conditional branch.
997 */
998#define BODY_CHECK_PC_AFTER_BRANCH(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
999 RT_NOREF(a_cbInstr); \
1000 off = iemNativeEmitBltInCheckPcAfterBranch(pReNative, off, a_pTb, a_idxRange, a_offRange)
1001
1002DECL_FORCE_INLINE(uint32_t)
1003iemNativeEmitBltInCheckPcAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb,
1004 uint8_t idxRange, uint16_t offRange)
1005{
1006#ifdef VBOX_STRICT
1007 off = iemNativeEmitMarker(pReNative, off, 0x80000004);
1008#endif
1009
1010 /*
1011 * The GCPhysRangePageWithOffset value in the threaded function is a fixed
1012 * constant for us here.
1013 *
1014 * We can pretend that iem.s.cbInstrBufTotal is X86_PAGE_SIZE here, because
1015 * it serves no purpose as a CS.LIM, if that's needed we've just performed
1016 * it, and as long as we don't implement code TLB reload code here there is
1017 * no point in checking that the TLB data we're using is still valid.
1018 *
1019 * What we to do is.
1020 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1021 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1022 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1023 * we're in the wrong spot and need to find a new TB.
1024 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1025 * GCPhysRangePageWithOffset constant mentioned above.
1026 *
1027 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1028 * in 64-bit code or flat 32-bit.
1029 */
1030
1031 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1032 the temp register, so we don't accidentally clobber something we'll be
1033 needing again immediately. This is why we get idxRegCsBase here. */
1034 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1035 kIemNativeGstRegUse_ReadOnly);
1036 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1037 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1038 kIemNativeGstRegUse_ReadOnly);
1039
1040 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1041
1042#ifdef VBOX_STRICT
1043 /* Do assertions before idxRegTmp contains anything. */
1044 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1045# ifdef RT_ARCH_AMD64
1046 {
1047 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1048 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1049 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1050 {
1051 /* cmp r/m64, imm8 */
1052 pbCodeBuf[off++] = X86_OP_REX_W;
1053 pbCodeBuf[off++] = 0x83;
1054 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1055 pbCodeBuf[off++] = 0;
1056 /* je rel8 */
1057 pbCodeBuf[off++] = 0x74;
1058 pbCodeBuf[off++] = 1;
1059 /* int3 */
1060 pbCodeBuf[off++] = 0xcc;
1061
1062 }
1063
1064 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1065 /* test r/m64, imm32 */
1066 pbCodeBuf[off++] = X86_OP_REX_W;
1067 pbCodeBuf[off++] = 0xf7;
1068 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1069 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1070 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1071 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1072 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1073 /* jz rel8 */
1074 pbCodeBuf[off++] = 0x74;
1075 pbCodeBuf[off++] = 1;
1076 /* int3 */
1077 pbCodeBuf[off++] = 0xcc;
1078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1079 }
1080# else
1081
1082 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1083 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1084 {
1085 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1086# ifdef RT_ARCH_ARM64
1087 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1088 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1089 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2004);
1090 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1091# else
1092# error "Port me!"
1093# endif
1094 }
1095# endif
1096
1097#endif /* VBOX_STRICT */
1098
1099 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1100 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1101 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1102 {
1103#ifdef RT_ARCH_ARM64
1104 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1105 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1106 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1107#else
1108 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1109 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1110#endif
1111 }
1112 else
1113 {
1114#ifdef RT_ARCH_ARM64
1115 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1116 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1117 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1119#else
1120 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1121 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1122 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1123#endif
1124 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1125 }
1126 iemNativeRegFreeTmp(pReNative, idxRegPc);
1127
1128 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal. */
1129 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1130 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_CheckBranchMiss);
1131
1132 /* 4. Add iem.s.GCPhysInstrBuf and compare with GCPhysRangePageWithOffset. */
1133#ifdef RT_ARCH_AMD64
1134 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1135 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1136 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1137 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1138 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1139
1140#elif defined(RT_ARCH_ARM64)
1141 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1142
1143 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1144 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1145 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1146 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1147
1148# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1149 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1150 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1151 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1152# endif
1153 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1154#else
1155# error "Port me"
1156#endif
1157
1158 RTGCPHYS const GCPhysRangePageWithOffset = ( iemTbGetRangePhysPageAddr(pTb, idxRange)
1159 | pTb->aRanges[idxRange].offPhysPage)
1160 + offRange;
1161 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegTmp, GCPhysRangePageWithOffset,
1162 kIemNativeLabelType_CheckBranchMiss);
1163
1164 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1165 return off;
1166}
1167
1168
1169/**
1170 * Macro that implements TLB loading and updating pbInstrBuf updating for an
1171 * instruction crossing into a new page.
1172 *
1173 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1174 */
1175#define BODY_LOAD_TLB_FOR_NEW_PAGE(a_pTb, a_offInstr, a_idxRange, a_cbInstr) \
1176 RT_NOREF(a_cbInstr); \
1177 off = iemNativeEmitBltLoadTlbForNewPage(pReNative, off, pTb, a_idxRange, a_offInstr)
1178
1179DECL_FORCE_INLINE(uint32_t)
1180iemNativeEmitBltLoadTlbForNewPage(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint8_t offInstr)
1181{
1182#ifdef VBOX_STRICT
1183 off = iemNativeEmitMarker(pReNative, off, 0x80000005);
1184#endif
1185
1186 /*
1187 * Move/spill/flush stuff out of call-volatile registers.
1188 * This is the easy way out. We could contain this to the tlb-miss branch
1189 * by saving and restoring active stuff here.
1190 */
1191 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
1192 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
1193
1194 /*
1195 * Define labels and allocate the register for holding the GCPhys of the new page.
1196 */
1197 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1198 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1199 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1200 uint32_t const idxRegGCPhys = iemNativeRegAllocTmp(pReNative, &off);
1201
1202 /*
1203 * First we try to go via the TLB.
1204 */
1205 /** @todo */
1206
1207 /*
1208 * TLB miss: Call iemNativeHlpMemCodeNewPageTlbMissWithOff to do the work.
1209 */
1210 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1211
1212 /* IEMNATIVE_CALL_ARG1_GREG = offInstr */
1213 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offInstr);
1214
1215 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1216 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1217
1218 /* Done setting up parameters, make the call. */
1219 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMissWithOff);
1220
1221 /* Move the result to the right register. */
1222 if (idxRegGCPhys != IEMNATIVE_CALL_RET_GREG)
1223 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegGCPhys, IEMNATIVE_CALL_RET_GREG);
1224
1225 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1226
1227 /*
1228 * Now check the physical address of the page matches the expected one.
1229 */
1230 RTGCPHYS const GCPhysNewPage = iemTbGetRangePhysPageAddr(pTb, idxRange);
1231 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegGCPhys, GCPhysNewPage,
1232 kIemNativeLabelType_ObsoleteTb);
1233
1234 iemNativeRegFreeTmp(pReNative, idxRegGCPhys);
1235 return off;
1236}
1237
1238
1239/**
1240 * Macro that implements TLB loading and updating pbInstrBuf updating when
1241 * branching or when crossing a page on an instruction boundrary.
1242 *
1243 * This differs from BODY_LOAD_TLB_FOR_NEW_PAGE in that it will first check if
1244 * it is an inter-page branch and also check the page offset.
1245 *
1246 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1247 */
1248#define BODY_LOAD_TLB_AFTER_BRANCH(a_pTb, a_idxRange, a_cbInstr) \
1249 RT_NOREF(a_cbInstr); \
1250 off = iemNativeEmitBltLoadTlbAfterBranch(pReNative, off, pTb, a_idxRange)
1251
1252#if 0
1253do { \
1254 /* Is RIP within the current code page? */ \
1255 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu)); \
1256 uint64_t const uPc = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base; \
1257 uint64_t const off = uPc - pVCpu->iem.s.uInstrBufPc; \
1258 if (off < pVCpu->iem.s.cbInstrBufTotal) \
1259 { \
1260 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK)); \
1261 Assert(pVCpu->iem.s.pbInstrBuf); \
1262 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(a_pTb, a_idxRange) \
1263 | pTb->aRanges[(a_idxRange)].offPhysPage; \
1264 if (GCPhysRangePageWithOffset == pVCpu->iem.s.GCPhysInstrBuf + off) \
1265 { /* we're good */ } \
1266 else \
1267 { \
1268 Log7(("TB jmp miss: %p at %04x:%08RX64 LB %u; branching/1; GCPhysWithOffset=%RGp expected %RGp, pbInstrBuf=%p - #%u\n", \
1269 (a_pTb), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, (a_cbInstr), \
1270 pVCpu->iem.s.GCPhysInstrBuf + off, GCPhysRangePageWithOffset, pVCpu->iem.s.pbInstrBuf, __LINE__)); \
1271 RT_NOREF(a_cbInstr); \
1272 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses); \
1273 return VINF_IEM_REEXEC_BREAK; \
1274 } \
1275 } \
1276 else \
1277 { \
1278 /* Must translate new RIP. */ \
1279 pVCpu->iem.s.pbInstrBuf = NULL; \
1280 pVCpu->iem.s.offCurInstrStart = 0; \
1281 pVCpu->iem.s.offInstrNextByte = 0; \
1282 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL); \
1283 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) || !pVCpu->iem.s.pbInstrBuf); \
1284 \
1285 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(a_pTb, a_idxRange) \
1286 | pTb->aRanges[(a_idxRange)].offPhysPage; \
1287 uint64_t const offNew = uPc - pVCpu->iem.s.uInstrBufPc; \
1288 if ( GCPhysRangePageWithOffset == pVCpu->iem.s.GCPhysInstrBuf + offNew \
1289 && pVCpu->iem.s.pbInstrBuf) \
1290 { /* likely */ } \
1291 else \
1292 { \
1293 Log7(("TB jmp miss: %p at %04x:%08RX64 LB %u; branching/2; GCPhysWithOffset=%RGp expected %RGp, pbInstrBuf=%p - #%u\n", \
1294 (a_pTb), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, (a_cbInstr), \
1295 pVCpu->iem.s.GCPhysInstrBuf + offNew, GCPhysRangePageWithOffset, pVCpu->iem.s.pbInstrBuf, __LINE__)); \
1296 RT_NOREF(a_cbInstr); \
1297 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses); \
1298 return VINF_IEM_REEXEC_BREAK; \
1299 } \
1300 } \
1301 } while(0)
1302#endif
1303
1304DECL_FORCE_INLINE(uint32_t)
1305iemNativeEmitBltLoadTlbAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange)
1306{
1307// off = iemNativeEmitBrk(pReNative, off, 0x1010);
1308#ifdef VBOX_STRICT
1309 off = iemNativeEmitMarker(pReNative, off, 0x80000006);
1310#endif
1311
1312 /*
1313 * Define labels and allocate the register for holding the GCPhys of the new page.
1314 */
1315 uint32_t const idxLabelCheckBranchMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckBranchMiss);
1316 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1317 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1318 //
1319
1320 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(pTb, idxRange)
1321 | pTb->aRanges[idxRange].offPhysPage;
1322
1323 /*
1324 *
1325 * First check if RIP is within the current code.
1326 *
1327 * This is very similar to iemNativeEmitBltInCheckPcAfterBranch, the only
1328 * difference is what we do when stuff doesn't match up.
1329 *
1330 * What we to do is.
1331 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1332 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1333 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1334 * we need to retranslate RIP via the TLB.
1335 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1336 * GCPhysRangePageWithOffset constant mentioned above.
1337 *
1338 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1339 * in 64-bit code or flat 32-bit.
1340 *
1341 */
1342
1343 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1344 the temp register, so we don't accidentally clobber something we'll be
1345 needing again immediately. This is why we get idxRegCsBase here. */
1346 /** @todo save+restore active registers and guest shadows in tlb-miss! */
1347 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
1348 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1349 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1350 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1351 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1352 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1353
1354 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off); /* volatile reg is okay for these two */
1355 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1356
1357#ifdef VBOX_STRICT
1358 /* Do assertions before idxRegTmp contains anything. */
1359 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1360# ifdef RT_ARCH_AMD64
1361 {
1362 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1363 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1364 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1365 {
1366 /* cmp r/m64, imm8 */
1367 pbCodeBuf[off++] = X86_OP_REX_W;
1368 pbCodeBuf[off++] = 0x83;
1369 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1370 pbCodeBuf[off++] = 0;
1371 /* je rel8 */
1372 pbCodeBuf[off++] = 0x74;
1373 pbCodeBuf[off++] = 1;
1374 /* int3 */
1375 pbCodeBuf[off++] = 0xcc;
1376
1377 }
1378
1379 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1380 /* test r/m64, imm32 */
1381 pbCodeBuf[off++] = X86_OP_REX_W;
1382 pbCodeBuf[off++] = 0xf7;
1383 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1384 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1385 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1386 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1387 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1388 /* jz rel8 */
1389 pbCodeBuf[off++] = 0x74;
1390 pbCodeBuf[off++] = 1;
1391 /* int3 */
1392 pbCodeBuf[off++] = 0xcc;
1393 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1394 }
1395# else
1396
1397 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1398 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1399 {
1400 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1401# ifdef RT_ARCH_ARM64
1402 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1403 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1404 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2006);
1405 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1406# else
1407# error "Port me!"
1408# endif
1409 }
1410# endif
1411
1412#endif /* VBOX_STRICT */
1413
1414 /* Because we're lazy, we'll jump back here to recalc 'off' and share the
1415 GCPhysRangePageWithOffset check. This is a little risky, so we use the
1416 2nd register to check if we've looped more than once already.*/
1417 off = iemNativeEmitGprZero(pReNative, off, idxRegTmp2);
1418
1419 uint32_t const offLabelRedoChecks = off;
1420
1421 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1422 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1423 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1424 {
1425#ifdef RT_ARCH_ARM64
1426 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1427 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1428 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1429#else
1430 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1431 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1432#endif
1433 }
1434 else
1435 {
1436#ifdef RT_ARCH_ARM64
1437 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1438 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1439 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1441#else
1442 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1443 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1444 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1445#endif
1446 }
1447
1448 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal.
1449 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll jump to the TLB loading if this fails. */
1450 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1451 uint32_t const offFixedJumpToTlbLoad = off;
1452 off = iemNativeEmitJaToFixed(pReNative, off, off /* (ASSUME ja rel8 suffices) */);
1453
1454 /* 4a. Add iem.s.GCPhysInstrBuf to off ... */
1455#ifdef RT_ARCH_AMD64
1456 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1457 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1458 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1459 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1460 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1461
1462#elif defined(RT_ARCH_ARM64)
1463
1464 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1465 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1466 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1467 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1468
1469# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1470 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1471 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1472 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1473# endif
1474#else
1475# error "Port me"
1476#endif
1477
1478 /* 4b. ... and compare with GCPhysRangePageWithOffset.
1479
1480 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll have to be more
1481 careful and avoid implicit temporary register usage here.
1482
1483 Unlike the threaded version of this code, we do not obsolete TBs here to
1484 reduce the code size and because indirect calls may legally end at the
1485 same offset in two different pages depending on the program state. */
1486 /** @todo synch the threaded BODY_LOAD_TLB_AFTER_BRANCH version with this. */
1487 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegTmp2, GCPhysRangePageWithOffset);
1488 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegTmp, idxRegTmp2);
1489 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelCheckBranchMiss);
1490 uint32_t const offFixedJumpToEnd = off;
1491 off = iemNativeEmitJmpToFixed(pReNative, off, off + 512 /* force rel32 */);
1492
1493 /*
1494 * First we try to go via the TLB.
1495 */
1496 iemNativeFixupFixedJump(pReNative, offFixedJumpToTlbLoad, off);
1497//off = iemNativeEmitBrk(pReNative, off, 0x1111);
1498
1499 /* Check that we haven't been here before. */
1500 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxRegTmp2, false /*f64Bit*/, idxLabelCheckBranchMiss);
1501
1502 /*
1503 * TLB miss: Call iemNativeHlpMemCodeNewPageTlbMiss to do the work.
1504 */
1505 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1506
1507 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1508 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1509
1510 /* Done setting up parameters, make the call. */
1511 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMiss);
1512
1513 /* Jmp back to the start and redo the checks. */
1514 off = iemNativeEmitLoadGpr8Imm(pReNative, off, idxRegTmp2, 1); /* indicate that we've looped once already */
1515 off = iemNativeEmitJmpToFixed(pReNative, off, offLabelRedoChecks);
1516
1517 /* The end. */
1518 iemNativeFixupFixedJump(pReNative, offFixedJumpToEnd, off);
1519
1520 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1521 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1522 iemNativeRegFreeTmp(pReNative, idxRegPc);
1523 if (idxRegCsBase != UINT8_MAX)
1524 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1525 return off;
1526}
1527
1528
1529#ifdef BODY_CHECK_CS_LIM
1530/**
1531 * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
1532 * raising a \#GP(0) if this isn't the case.
1533 */
1534IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
1535{
1536 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1537 BODY_SET_CUR_INSTR();
1538 BODY_FLUSH_PENDING_WRITES();
1539 BODY_CHECK_CS_LIM(cbInstr);
1540 return off;
1541}
1542#endif
1543
1544
1545#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
1546/**
1547 * Built-in function for re-checking opcodes and CS.LIM after an instruction
1548 * that may have modified them.
1549 */
1550IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
1551{
1552 PCIEMTB const pTb = pReNative->pTbOrg;
1553 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1554 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1555 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1556 BODY_SET_CUR_INSTR();
1557 BODY_FLUSH_PENDING_WRITES();
1558 BODY_CHECK_CS_LIM(cbInstr);
1559 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1560 return off;
1561}
1562#endif
1563
1564
1565#if defined(BODY_CHECK_OPCODES)
1566/**
1567 * Built-in function for re-checking opcodes after an instruction that may have
1568 * modified them.
1569 */
1570IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
1571{
1572 PCIEMTB const pTb = pReNative->pTbOrg;
1573 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1574 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1575 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1576 BODY_SET_CUR_INSTR();
1577 BODY_FLUSH_PENDING_WRITES();
1578 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1579 return off;
1580}
1581#endif
1582
1583
1584#if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1585/**
1586 * Built-in function for re-checking opcodes and considering the need for CS.LIM
1587 * checking after an instruction that may have modified them.
1588 */
1589IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
1590{
1591 PCIEMTB const pTb = pReNative->pTbOrg;
1592 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1593 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1594 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1595 BODY_SET_CUR_INSTR();
1596 BODY_FLUSH_PENDING_WRITES();
1597 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1598 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1599 return off;
1600}
1601#endif
1602
1603
1604/*
1605 * Post-branching checkers.
1606 */
1607
1608#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1609/**
1610 * Built-in function for checking CS.LIM, checking the PC and checking opcodes
1611 * after conditional branching within the same page.
1612 *
1613 * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
1614 */
1615IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
1616{
1617 PCIEMTB const pTb = pReNative->pTbOrg;
1618 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1619 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1620 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1621 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1622 BODY_SET_CUR_INSTR();
1623 BODY_FLUSH_PENDING_WRITES();
1624 BODY_CHECK_CS_LIM(cbInstr);
1625 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1626 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1627 //LogFunc(("okay\n"));
1628 return off;
1629}
1630#endif
1631
1632
1633#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
1634/**
1635 * Built-in function for checking the PC and checking opcodes after conditional
1636 * branching within the same page.
1637 *
1638 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1639 */
1640IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
1641{
1642 PCIEMTB const pTb = pReNative->pTbOrg;
1643 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1644 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1645 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1646 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1647 BODY_SET_CUR_INSTR();
1648 BODY_FLUSH_PENDING_WRITES();
1649 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1650 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1651 //LogFunc(("okay\n"));
1652 return off;
1653}
1654#endif
1655
1656
1657#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1658/**
1659 * Built-in function for checking the PC and checking opcodes and considering
1660 * the need for CS.LIM checking after conditional branching within the same
1661 * page.
1662 *
1663 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1664 */
1665IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
1666{
1667 PCIEMTB const pTb = pReNative->pTbOrg;
1668 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1669 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1670 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1671 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1672 BODY_SET_CUR_INSTR();
1673 BODY_FLUSH_PENDING_WRITES();
1674 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1675 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1676 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1677 //LogFunc(("okay\n"));
1678 return off;
1679}
1680#endif
1681
1682
1683#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1684/**
1685 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1686 * transitioning to a different code page.
1687 *
1688 * The code page transition can either be natural over onto the next page (with
1689 * the instruction starting at page offset zero) or by means of branching.
1690 *
1691 * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1692 */
1693IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
1694{
1695 PCIEMTB const pTb = pReNative->pTbOrg;
1696 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1697 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1698 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1699 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1700 BODY_SET_CUR_INSTR();
1701 BODY_FLUSH_PENDING_WRITES();
1702 BODY_CHECK_CS_LIM(cbInstr);
1703 Assert(offRange == 0);
1704 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1705 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1706 //LogFunc(("okay\n"));
1707 return off;
1708}
1709#endif
1710
1711
1712#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
1713/**
1714 * Built-in function for loading TLB and checking opcodes when transitioning to
1715 * a different code page.
1716 *
1717 * The code page transition can either be natural over onto the next page (with
1718 * the instruction starting at page offset zero) or by means of branching.
1719 *
1720 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1721 */
1722IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
1723{
1724 PCIEMTB const pTb = pReNative->pTbOrg;
1725 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1726 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1727 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1728 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1729 BODY_SET_CUR_INSTR();
1730 BODY_FLUSH_PENDING_WRITES();
1731 Assert(offRange == 0);
1732 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1733 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1734 //LogFunc(("okay\n"));
1735 return off;
1736}
1737#endif
1738
1739
1740#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1741/**
1742 * Built-in function for loading TLB and checking opcodes and considering the
1743 * need for CS.LIM checking when transitioning to a different code page.
1744 *
1745 * The code page transition can either be natural over onto the next page (with
1746 * the instruction starting at page offset zero) or by means of branching.
1747 *
1748 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1749 */
1750IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
1751{
1752 PCIEMTB const pTb = pReNative->pTbOrg;
1753 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1754 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1755 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1756 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1757 BODY_SET_CUR_INSTR();
1758 BODY_FLUSH_PENDING_WRITES();
1759 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1760 Assert(offRange == 0);
1761 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1762 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1763 //LogFunc(("okay\n"));
1764 return off;
1765}
1766#endif
1767
1768
1769
1770/*
1771 * Natural page crossing checkers.
1772 */
1773
1774#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1775/**
1776 * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
1777 * both pages when transitioning to a different code page.
1778 *
1779 * This is used when the previous instruction requires revalidation of opcodes
1780 * bytes and the current instruction stries a page boundrary with opcode bytes
1781 * in both the old and new page.
1782 *
1783 * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1784 */
1785IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
1786{
1787 PCIEMTB const pTb = pReNative->pTbOrg;
1788 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1789 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1790 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1791 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1792 uint32_t const idxRange2 = idxRange1 + 1;
1793 BODY_SET_CUR_INSTR();
1794 BODY_FLUSH_PENDING_WRITES();
1795 BODY_CHECK_CS_LIM(cbInstr);
1796 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1797 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1798 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1799 return off;
1800}
1801#endif
1802
1803
1804#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1805/**
1806 * Built-in function for loading TLB and checking opcodes on both pages when
1807 * transitioning to a different code page.
1808 *
1809 * This is used when the previous instruction requires revalidation of opcodes
1810 * bytes and the current instruction stries a page boundrary with opcode bytes
1811 * in both the old and new page.
1812 *
1813 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1814 */
1815IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
1816{
1817 PCIEMTB const pTb = pReNative->pTbOrg;
1818 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1819 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1820 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1821 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1822 uint32_t const idxRange2 = idxRange1 + 1;
1823 BODY_SET_CUR_INSTR();
1824 BODY_FLUSH_PENDING_WRITES();
1825 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1826 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1827 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1828 return off;
1829}
1830#endif
1831
1832
1833#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1834/**
1835 * Built-in function for loading TLB and checking opcodes on both pages and
1836 * considering the need for CS.LIM checking when transitioning to a different
1837 * code page.
1838 *
1839 * This is used when the previous instruction requires revalidation of opcodes
1840 * bytes and the current instruction stries a page boundrary with opcode bytes
1841 * in both the old and new page.
1842 *
1843 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1844 */
1845IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
1846{
1847 PCIEMTB const pTb = pReNative->pTbOrg;
1848 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1849 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1850 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1851 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1852 uint32_t const idxRange2 = idxRange1 + 1;
1853 BODY_SET_CUR_INSTR();
1854 BODY_FLUSH_PENDING_WRITES();
1855 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1856 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1857 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1858 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1859 return off;
1860}
1861#endif
1862
1863
1864#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1865/**
1866 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1867 * advancing naturally to a different code page.
1868 *
1869 * Only opcodes on the new page is checked.
1870 *
1871 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1872 */
1873IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
1874{
1875 PCIEMTB const pTb = pReNative->pTbOrg;
1876 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1877 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1878 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1879 //uint32_t const offRange1 = (uint32_t)uParam2;
1880 uint32_t const idxRange2 = idxRange1 + 1;
1881 BODY_SET_CUR_INSTR();
1882 BODY_FLUSH_PENDING_WRITES();
1883 BODY_CHECK_CS_LIM(cbInstr);
1884 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1885 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1886 return off;
1887}
1888#endif
1889
1890
1891#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1892/**
1893 * Built-in function for loading TLB and checking opcodes when advancing
1894 * naturally to a different code page.
1895 *
1896 * Only opcodes on the new page is checked.
1897 *
1898 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1899 */
1900IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
1901{
1902 PCIEMTB const pTb = pReNative->pTbOrg;
1903 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1904 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1905 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1906 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1907 uint32_t const idxRange2 = idxRange1 + 1;
1908 BODY_SET_CUR_INSTR();
1909 BODY_FLUSH_PENDING_WRITES();
1910 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1911 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1912 return off;
1913}
1914#endif
1915
1916
1917#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1918/**
1919 * Built-in function for loading TLB and checking opcodes and considering the
1920 * need for CS.LIM checking when advancing naturally to a different code page.
1921 *
1922 * Only opcodes on the new page is checked.
1923 *
1924 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1925 */
1926IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
1927{
1928 PCIEMTB const pTb = pReNative->pTbOrg;
1929 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1930 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1931 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1932 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1933 uint32_t const idxRange2 = idxRange1 + 1;
1934 BODY_SET_CUR_INSTR();
1935 BODY_FLUSH_PENDING_WRITES();
1936 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1937 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1938 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1939 return off;
1940}
1941#endif
1942
1943
1944#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1945/**
1946 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1947 * advancing naturally to a different code page with first instr at byte 0.
1948 *
1949 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1950 */
1951IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
1952{
1953 PCIEMTB const pTb = pReNative->pTbOrg;
1954 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1955 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1956 BODY_SET_CUR_INSTR();
1957 BODY_FLUSH_PENDING_WRITES();
1958 BODY_CHECK_CS_LIM(cbInstr);
1959 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1960 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1961 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1962 return off;
1963}
1964#endif
1965
1966
1967#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1968/**
1969 * Built-in function for loading TLB and checking opcodes when advancing
1970 * naturally to a different code page with first instr at byte 0.
1971 *
1972 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1973 */
1974IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
1975{
1976 PCIEMTB const pTb = pReNative->pTbOrg;
1977 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1978 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1979 BODY_SET_CUR_INSTR();
1980 BODY_FLUSH_PENDING_WRITES();
1981 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1982 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1983 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1984 return off;
1985}
1986#endif
1987
1988
1989#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1990/**
1991 * Built-in function for loading TLB and checking opcodes and considering the
1992 * need for CS.LIM checking when advancing naturally to a different code page
1993 * with first instr at byte 0.
1994 *
1995 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1996 */
1997IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
1998{
1999 PCIEMTB const pTb = pReNative->pTbOrg;
2000 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
2001 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
2002 BODY_SET_CUR_INSTR();
2003 BODY_FLUSH_PENDING_WRITES();
2004 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
2005 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
2006 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
2007 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
2008 return off;
2009}
2010#endif
2011
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette