VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompBltIn.cpp@ 102727

Last change on this file since 102727 was 102702, checked in by vboxsync, 14 months ago

VMM/IEM: Stepped thru iemNativeEmitBltLoadTlbAfterBranch on arm. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 86.2 KB
Line 
1/* $Id: IEMAllN8veRecompBltIn.cpp 102702 2023-12-26 00:30:00Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, Emitters for Built-In Threaded Functions.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33//#define IEM_WITH_OPAQUE_DECODER_STATE - need offCurInstrStart access for iemNativeHlpMemCodeNewPageTlbMiss and friends.
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#include <VBox/vmm/iem.h>
37#include <VBox/vmm/cpum.h>
38#include <VBox/vmm/dbgf.h>
39#include "IEMInternal.h"
40#include <VBox/vmm/vmcc.h>
41#include <VBox/log.h>
42#include <VBox/err.h>
43#include <VBox/param.h>
44#include <iprt/assert.h>
45#include <iprt/string.h>
46#if defined(RT_ARCH_AMD64)
47# include <iprt/x86.h>
48#elif defined(RT_ARCH_ARM64)
49# include <iprt/armv8.h>
50#endif
51
52
53#include "IEMInline.h"
54#include "IEMThreadedFunctions.h"
55#include "IEMN8veRecompiler.h"
56#include "IEMN8veRecompilerEmit.h"
57
58
59
60/*********************************************************************************************************************************
61* TB Helper Functions *
62*********************************************************************************************************************************/
63#ifdef RT_ARCH_AMD64
64DECLASM(void) iemNativeHlpAsmSafeWrapLogCpuState(void);
65#endif
66
67
68/**
69 * Used by TB code to deal with a TLB miss for a new page.
70 */
71IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCodeNewPageTlbMiss,(PVMCPUCC pVCpu))
72{
73 pVCpu->iem.s.pbInstrBuf = NULL;
74 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE;
75 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
76 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
77 if (pVCpu->iem.s.pbInstrBuf)
78 { /* likely */ }
79 else
80 {
81 IEM_DO_LONGJMP(pVCpu, VINF_IEM_REEXEC_BREAK);
82 }
83}
84
85
86/**
87 * Used by TB code to deal with a TLB miss for a new page.
88 */
89IEM_DECL_NATIVE_HLP_DEF(RTGCPHYS, iemNativeHlpMemCodeNewPageTlbMissWithOff,(PVMCPUCC pVCpu, uint8_t offInstr))
90{
91 pVCpu->iem.s.pbInstrBuf = NULL;
92 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE - offInstr;
93 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
94 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
95 return pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf : NIL_RTGCPHYS;
96}
97
98
99/*********************************************************************************************************************************
100* Builtin functions *
101*********************************************************************************************************************************/
102
103/**
104 * Built-in function that does nothing.
105 *
106 * Whether this is called or not can be controlled by the entry in the
107 * IEMThreadedGenerator.katBltIns table. This can be useful to determine
108 * whether why behaviour changes when enabling the LogCpuState builtins. I.e.
109 * whether it's the reduced call count in the TBs or the threaded calls flushing
110 * register state.
111 */
112IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_Nop)
113{
114 RT_NOREF(pReNative, pCallEntry);
115 return off;
116}
117
118
119/**
120 * Emits for for LogCpuState.
121 *
122 * This shouldn't have any relevant impact on the recompiler state.
123 */
124IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_LogCpuState)
125{
126#ifdef RT_ARCH_AMD64
127 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
128 /* push rax */
129 pbCodeBuf[off++] = 0x50 + X86_GREG_xAX;
130 /* push imm32 */
131 pbCodeBuf[off++] = 0x68;
132 pbCodeBuf[off++] = RT_BYTE1(pCallEntry->auParams[0]);
133 pbCodeBuf[off++] = RT_BYTE2(pCallEntry->auParams[0]);
134 pbCodeBuf[off++] = RT_BYTE3(pCallEntry->auParams[0]);
135 pbCodeBuf[off++] = RT_BYTE4(pCallEntry->auParams[0]);
136 /* mov rax, iemNativeHlpAsmSafeWrapLogCpuState */
137 pbCodeBuf[off++] = X86_OP_REX_W;
138 pbCodeBuf[off++] = 0xb8 + X86_GREG_xAX;
139 *(uint64_t *)&pbCodeBuf[off] = (uintptr_t)iemNativeHlpAsmSafeWrapLogCpuState;
140 off += sizeof(uint64_t);
141 /* call rax */
142 pbCodeBuf[off++] = 0xff;
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
144 /* pop rax */
145 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
146 /* pop rax */
147 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
148 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
149
150#else
151 /** @todo Implement this */
152 AssertFailed();
153 RT_NOREF(pReNative, pCallEntry);
154#endif
155 return off;
156}
157
158
159/**
160 * Built-in function that calls a C-implemention function taking zero arguments.
161 */
162IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
163{
164 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
165 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
166 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
167 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
168}
169
170
171/**
172 * Built-in function that checks for pending interrupts that can be delivered or
173 * forced action flags.
174 *
175 * This triggers after the completion of an instruction, so EIP is already at
176 * the next instruction. If an IRQ or important FF is pending, this will return
177 * a non-zero status that stops TB execution.
178 */
179IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
180{
181 RT_NOREF(pCallEntry);
182
183 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
184 and I'm too lazy to create a 'Fixed' version of that one. */
185 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
186 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
187
188 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
189
190 /* Again, we need to load the extended EFLAGS before we actually need them
191 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
192 loaded them inside the check, as the shadow state would not be correct
193 when the code branches before the load. Ditto PC. */
194 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
195 kIemNativeGstRegUse_ReadOnly);
196
197 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
198
199 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
200
201 /*
202 * Start by checking the local forced actions of the EMT we're on for IRQs
203 * and other FFs that needs servicing.
204 */
205 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
206 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
207 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
208 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
209 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
210 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
211 | VMCPU_FF_TLB_FLUSH
212 | VMCPU_FF_UNHALT ),
213 true /*fSetFlags*/);
214 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
215 uint32_t const offFixupJumpToVmCheck1 = off;
216 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices */);
217
218 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
219 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
220 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
221 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
222 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
223 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
224
225 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
226 suppressed by the CPU or not. */
227 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
228 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
229 idxLabelReturnBreak);
230
231 /* We've got shadow flags set, so we must check that the PC they are valid
232 for matches our current PC value. */
233 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
234 * a register. */
235 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
236 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
237
238 /*
239 * Now check the force flags of the VM.
240 */
241 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
242 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
243 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
244 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
245 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
246 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
247
248 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
249
250 /*
251 * We're good, no IRQs or FFs pending.
252 */
253 iemNativeRegFreeTmp(pReNative, idxTmpReg);
254 iemNativeRegFreeTmp(pReNative, idxEflReg);
255 iemNativeRegFreeTmp(pReNative, idxPcReg);
256
257 return off;
258}
259
260
261/**
262 * Built-in function checks if IEMCPU::fExec has the expected value.
263 */
264IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
265{
266 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
267 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
268
269 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
270 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
271 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
272 kIemNativeLabelType_ReturnBreak);
273 iemNativeRegFreeTmp(pReNative, idxTmpReg);
274 return off;
275}
276
277
278/**
279 * Sets idxTbCurInstr in preparation of raising an exception or aborting the TB.
280 */
281/** @todo Optimize this, so we don't set the same value more than once. Just
282 * needs some tracking. */
283#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
284# define BODY_SET_CUR_INSTR() \
285 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
286#else
287# define BODY_SET_CUR_INSTR() ((void)0)
288#endif
289
290/**
291 * Flushes pending writes in preparation of raising an exception or aborting the TB.
292 */
293#define BODY_FLUSH_PENDING_WRITES() \
294 off = iemNativeRegFlushPendingWrites(pReNative, off);
295
296
297/**
298 * Macro that emits the 16/32-bit CS.LIM check.
299 */
300#define BODY_CHECK_CS_LIM(a_cbInstr) \
301 off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
302
303DECL_FORCE_INLINE(uint32_t)
304iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
305{
306 Assert(cbInstr > 0);
307 Assert(cbInstr < 16);
308#ifdef VBOX_STRICT
309 off = iemNativeEmitMarker(pReNative, off, 0x80000001);
310#endif
311
312 /*
313 * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
314 * a temporary register for calculating the last address of the instruction.
315 *
316 * The calculation and comparisons are 32-bit. We ASSUME that the incoming
317 * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
318 * that last updated EIP here checked it already, and that we're therefore
319 * safe in the 32-bit wrap-around scenario to only check that the last byte
320 * is within CS.LIM. In the case of instruction-by-instruction advancing
321 * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
322 * must be using 4KB granularity and the previous instruction was fine.
323 */
324 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
325 kIemNativeGstRegUse_ReadOnly);
326 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
327 kIemNativeGstRegUse_ReadOnly);
328#ifdef RT_ARCH_AMD64
329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
330#elif defined(RT_ARCH_ARM64)
331 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
332#else
333# error "Port me"
334#endif
335
336 if (cbInstr != 1)
337 {
338 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
339
340 /*
341 * 1. idxRegTmp = idxRegPc + cbInstr;
342 * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
343 */
344#ifdef RT_ARCH_AMD64
345 /* 1. lea tmp32, [Pc + cbInstr - 1] */
346 if (idxRegTmp >= 8 || idxRegPc >= 8)
347 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
348 pbCodeBuf[off++] = 0x8d;
349 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
350 if ((idxRegPc & 7) == X86_GREG_xSP)
351 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
352 pbCodeBuf[off++] = cbInstr - 1;
353
354 /* 2. cmp tmp32(r), CsLim(r/m). */
355 if (idxRegTmp >= 8 || idxRegCsLim >= 8)
356 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
357 pbCodeBuf[off++] = 0x3b;
358 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
359
360#elif defined(RT_ARCH_ARM64)
361 /* 1. add tmp32, Pc, #cbInstr-1 */
362 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
363 /* 2. cmp tmp32, CsLim */
364 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
365 false /*f64Bit*/, true /*fSetFlags*/);
366
367#endif
368 iemNativeRegFreeTmp(pReNative, idxRegTmp);
369 }
370 else
371 {
372 /*
373 * Here we can skip step 1 and compare PC and CS.LIM directly.
374 */
375#ifdef RT_ARCH_AMD64
376 /* 2. cmp eip(r), CsLim(r/m). */
377 if (idxRegPc >= 8 || idxRegCsLim >= 8)
378 pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
379 pbCodeBuf[off++] = 0x3b;
380 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
381
382#elif defined(RT_ARCH_ARM64)
383 /* 2. cmp Pc, CsLim */
384 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
385 false /*f64Bit*/, true /*fSetFlags*/);
386
387#endif
388 }
389
390 /* 3. Jump if greater. */
391 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
392
393 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
394 iemNativeRegFreeTmp(pReNative, idxRegPc);
395 return off;
396}
397
398
399/**
400 * Macro that considers whether we need CS.LIM checking after a branch or
401 * crossing over to a new page.
402 */
403#define BODY_CONSIDER_CS_LIM_CHECKING(a_pTb, a_cbInstr) \
404 RT_NOREF(a_cbInstr); \
405 off = iemNativeEmitBltInConsiderLimChecking(pReNative, off)
406
407DECL_FORCE_INLINE(uint32_t)
408iemNativeEmitBltInConsiderLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off)
409{
410#ifdef VBOX_STRICT
411 off = iemNativeEmitMarker(pReNative, off, 0x80000002);
412#endif
413
414 /*
415 * This check must match the ones in the iem in iemGetTbFlagsForCurrentPc
416 * exactly:
417 *
418 * int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
419 * if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
420 * return fRet;
421 * return fRet | IEMTB_F_CS_LIM_CHECKS;
422 *
423 *
424 * We need EIP, CS.LIM and CS.BASE here.
425 */
426
427 /* Calculate the offFromLim first: */
428 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
429 kIemNativeGstRegUse_ReadOnly);
430 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
431 kIemNativeGstRegUse_ReadOnly);
432 uint8_t const idxRegLeft = iemNativeRegAllocTmp(pReNative, &off);
433
434#ifdef RT_ARCH_ARM64
435 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
436 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegLeft, idxRegCsLim, idxRegPc);
437 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
438#else
439 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegLeft, idxRegCsLim);
440 off = iemNativeEmitSubTwoGprs(pReNative, off, idxRegLeft, idxRegPc);
441#endif
442
443 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
444 iemNativeRegFreeTmp(pReNative, idxRegPc);
445
446 /* Calculate the threshold level (right side). */
447 uint8_t const idxRegCsBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
448 kIemNativeGstRegUse_ReadOnly);
449 uint8_t const idxRegRight = iemNativeRegAllocTmp(pReNative, &off);
450
451#ifdef RT_ARCH_ARM64
452 pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
453 Assert(Armv8A64ConvertImmRImmS2Mask32(11, 0) == GUEST_PAGE_OFFSET_MASK);
454 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegRight, idxRegCsBase, 11, 0, false /*f64Bit*/);
455 pu32CodeBuf[off++] = Armv8A64MkInstrNeg(idxRegRight);
456 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
457 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
458 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
459
460#else
461 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegRight, GUEST_PAGE_OFFSET_MASK);
462 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegRight, idxRegCsBase);
463 off = iemNativeEmitNegGpr(pReNative, off, idxRegRight);
464 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRight, X86_PAGE_SIZE + 16);
465#endif
466
467 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
468
469 /* Compare the two and jump out if we're too close to the limit. */
470 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegLeft, idxRegRight);
471 off = iemNativeEmitJlToNewLabel(pReNative, off, kIemNativeLabelType_NeedCsLimChecking);
472
473 iemNativeRegFreeTmp(pReNative, idxRegRight);
474 iemNativeRegFreeTmp(pReNative, idxRegLeft);
475 return off;
476}
477
478
479
480/**
481 * Macro that implements opcode (re-)checking.
482 */
483#define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
484 RT_NOREF(a_cbInstr); \
485 off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
486
487#if 0 /* debugging aid */
488bool g_fBpOnObsoletion = false;
489# define BP_ON_OBSOLETION g_fBpOnObsoletion
490#else
491# define BP_ON_OBSOLETION 0
492#endif
493
494DECL_FORCE_INLINE(uint32_t)
495iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
496{
497 Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
498 Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
499#ifdef VBOX_STRICT
500 off = iemNativeEmitMarker(pReNative, off, 0x80000003);
501#endif
502
503 uint32_t const idxLabelObsoleteTb = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ObsoleteTb);
504
505 /*
506 * Where to start and how much to compare.
507 *
508 * Looking at the ranges produced when r160746 was running a DOS VM with TB
509 * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
510 * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
511 *
512 * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
513 * 12, 10, 11, 6, 13, 15 and 16. Max 0x359 bytes. Same revision as above.
514 */
515 uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange;
516 uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange;
517 Assert(cbLeft > 0);
518 uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes + offRange];
519 uint32_t offConsolidatedJump = UINT32_MAX;
520
521#ifdef RT_ARCH_AMD64
522 /* AMD64/x86 offers a bunch of options. Smaller stuff will can be
523 completely inlined, for larger we use REPE CMPS. */
524# define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */ do { \
525 pbCodeBuf[off++] = a_bOpcode; \
526 Assert(offPage < 127); \
527 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
528 pbCodeBuf[off++] = RT_BYTE1(offPage); \
529 } while (0)
530
531# define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
532 if (offConsolidatedJump != UINT32_MAX) \
533 { \
534 int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
535 Assert(offDisp >= -128); \
536 pbCodeBuf[off++] = 0x75; /* jnz near */ \
537 pbCodeBuf[off++] = (uint8_t)offDisp; \
538 } \
539 else \
540 { \
541 pbCodeBuf[off++] = 0x74; /* jz near +5 */ \
542 pbCodeBuf[off++] = 0x05 + BP_ON_OBSOLETION; \
543 offConsolidatedJump = off; \
544 if (BP_ON_OBSOLETION) pbCodeBuf[off++] = 0xcc; \
545 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */ \
546 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_Rel32, -4); \
547 pbCodeBuf[off++] = 0x00; \
548 pbCodeBuf[off++] = 0x00; \
549 pbCodeBuf[off++] = 0x00; \
550 pbCodeBuf[off++] = 0x00; \
551 } \
552 } while (0)
553
554# define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
555 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
556 pbCodeBuf[off++] = *pbOpcodes++; \
557 pbCodeBuf[off++] = *pbOpcodes++; \
558 pbCodeBuf[off++] = *pbOpcodes++; \
559 pbCodeBuf[off++] = *pbOpcodes++; \
560 cbLeft -= 4; \
561 offPage += 4; \
562 CHECK_OPCODES_CMP_JMP(); \
563 } while (0)
564
565# define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
566 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
567 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
568 pbCodeBuf[off++] = *pbOpcodes++; \
569 pbCodeBuf[off++] = *pbOpcodes++; \
570 cbLeft -= 2; \
571 offPage += 2; \
572 CHECK_OPCODES_CMP_JMP(); \
573 } while (0)
574
575# define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
576 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
577 pbCodeBuf[off++] = *pbOpcodes++; \
578 cbLeft -= 1; \
579 offPage += 1; \
580 CHECK_OPCODES_CMP_JMP(); \
581 } while (0)
582
583# define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
584 if (a_bPrefix) \
585 pbCodeBuf[off++] = (a_bPrefix); \
586 pbCodeBuf[off++] = (a_bOpcode); \
587 CHECK_OPCODES_CMP_JMP(); \
588 cbLeft -= (a_cbToSubtract); \
589 } while (0)
590
591# define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
592 pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
593 pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
594 pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
595 pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
596 pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
597 } while (0)
598
599 if (cbLeft <= 24)
600 {
601 uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
602 ( RT_BIT_32(X86_GREG_xAX)
603 | RT_BIT_32(X86_GREG_xCX)
604 | RT_BIT_32(X86_GREG_xDX)
605 | RT_BIT_32(X86_GREG_xBX)
606 | RT_BIT_32(X86_GREG_xSI)
607 | RT_BIT_32(X86_GREG_xDI))
608 & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
609 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
610 if (offPage >= 128 - cbLeft)
611 {
612 off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
613 offPage &= 3;
614 }
615
616 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 14 + 54 + 8 + 6 + BP_ON_OBSOLETION /* = 87 */);
617
618 if (cbLeft > 8)
619 switch (offPage & 3)
620 {
621 case 0:
622 break;
623 case 1: /* cost: 6 + 8 = 14 */
624 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
625 RT_FALL_THRU();
626 case 2: /* cost: 8 */
627 CHECK_OPCODES_CMP_IMM16(idxRegTmp);
628 break;
629 case 3: /* cost: 6 */
630 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
631 break;
632 }
633
634 while (cbLeft >= 4)
635 CHECK_OPCODES_CMP_IMM32(idxRegTmp); /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
636
637 if (cbLeft >= 2)
638 CHECK_OPCODES_CMP_IMM16(idxRegTmp); /* cost: 8 */
639 if (cbLeft)
640 CHECK_OPCODES_CMP_IMM8(idxRegTmp); /* cost: 6 */
641
642 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
643 iemNativeRegFreeTmp(pReNative, idxRegTmp);
644 }
645 else
646 {
647 /* RDI = &pbInstrBuf[offPage] */
648 uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
649 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
650 if (offPage != 0)
651 off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
652
653 /* RSI = pbOpcodes */
654 uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
655 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
656
657 /* RCX = counts. */
658 uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
659
660 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 10 + 5 + 5 + 3 + 4 + 3 + BP_ON_OBSOLETION /*= 35*/);
661
662 /** @todo profile and optimize this further. Maybe an idea to align by
663 * offPage if the two cannot be reconsidled. */
664 /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
665 switch (offPage & 7) /* max cost: 10 */
666 {
667 case 0:
668 break;
669 case 1: /* cost: 3+4+3 = 10 */
670 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
671 RT_FALL_THRU();
672 case 2: /* cost: 4+3 = 7 */
673 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
674 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
675 break;
676 case 3: /* cost: 3+3 = 6 */
677 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
678 RT_FALL_THRU();
679 case 4: /* cost: 3 */
680 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
681 break;
682 case 5: /* cost: 3+4 = 7 */
683 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
684 RT_FALL_THRU();
685 case 6: /* cost: 4 */
686 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
687 break;
688 case 7: /* cost: 3 */
689 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
690 break;
691 }
692
693 /* Compare qwords: */
694 uint32_t const cQWords = cbLeft >> 3;
695 CHECK_OPCODES_ECX_IMM(cQWords); /* cost: 5 */
696
697 pbCodeBuf[off++] = X86_OP_PRF_REPZ; /* cost: 5 */
698 CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
699 cbLeft &= 7;
700
701 if (cbLeft & 4)
702 CHECK_OPCODES_CMPSX(0xa7, 4, 0); /* cost: 3 */
703 if (cbLeft & 2)
704 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP); /* cost: 4 */
705 if (cbLeft & 1)
706 CHECK_OPCODES_CMPSX(0xa6, 1, 0); /* cost: 3 */
707
708 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
709 iemNativeRegFreeTmp(pReNative, idxRegCx);
710 iemNativeRegFreeTmp(pReNative, idxRegSi);
711 iemNativeRegFreeTmp(pReNative, idxRegDi);
712 }
713
714#elif defined(RT_ARCH_ARM64)
715 /* We need pbInstrBuf in a register, whatever we do. */
716 uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off);
717 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
718
719 /* We also need at least one more register for holding bytes & words we
720 load via pbInstrBuf. */
721 uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off);
722
723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
724
725 /* One byte compare can be done with the opcode byte as an immediate. We'll
726 do this to uint16_t align src1. */
727 bool fPendingJmp = RT_BOOL(offPage & 1);
728 if (fPendingJmp)
729 {
730 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
731 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/);
732 offPage += 1;
733 cbLeft -= 1;
734 }
735
736 if (cbLeft > 0)
737 {
738 /* We need a register for holding the opcode bytes we're comparing with,
739 as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */
740 uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off);
741
742 /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */
743 if ((offPage & 3) && cbLeft >= 2)
744 {
745 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2);
746 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
747 if (fPendingJmp)
748 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
749 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
750 else
751 {
752 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
753 fPendingJmp = true;
754 }
755 pbOpcodes += 2;
756 offPage += 2;
757 cbLeft -= 2;
758 }
759
760 /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */
761 if ((offPage & 7) && cbLeft >= 4)
762 {
763 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4);
764 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
765 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
766 if (fPendingJmp)
767 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
768 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
769 else
770 {
771 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
772 fPendingJmp = true;
773 }
774 pbOpcodes += 4;
775 offPage += 4;
776 cbLeft -= 4;
777 }
778
779 /*
780 * If we've got 16 bytes or more left, switch to memcmp-style.
781 */
782 if (cbLeft >= 16)
783 {
784 /* We need a pointer to the copy of the original opcode bytes. */
785 uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off);
786 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes);
787
788 /* If there are more than 32 bytes to compare we create a loop, for
789 which we'll need a loop register. */
790 if (cbLeft >= 64)
791 {
792 if (fPendingJmp)
793 {
794 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
795 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
796 fPendingJmp = false;
797 }
798
799 uint8_t const idxRegLoop = iemNativeRegAllocTmp(pReNative, &off);
800 uint16_t const cLoops = cbLeft / 32;
801 cbLeft = cbLeft % 32;
802 pbOpcodes += cLoops * 32;
803 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops);
804
805 if (offPage != 0) /** @todo optimize out this instruction. */
806 {
807 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage);
808 offPage = 0;
809 }
810
811 uint32_t const offLoopStart = off;
812 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0);
813 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0);
814 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
815
816 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1);
817 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1);
818 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
819 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
820
821 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2);
822 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2);
823 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
824 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
825
826 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3);
827 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3);
828 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
829 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
830
831 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
832 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
833
834 /* Advance and loop. */
835 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20);
836 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20);
837 pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/);
838 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off);
839
840 iemNativeRegFreeTmp(pReNative, idxRegLoop);
841 }
842
843 /* Deal with any remaining dwords (uint64_t). There can be up to
844 three if we looped and four if we didn't. */
845 uint32_t offSrc2 = 0;
846 while (cbLeft >= 8)
847 {
848 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val,
849 idxRegSrc1Ptr, offPage / 8);
850 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val,
851 idxRegSrc2Ptr, offSrc2 / 8);
852 if (fPendingJmp)
853 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
854 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
855 else
856 {
857 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
858 fPendingJmp = true;
859 }
860 pbOpcodes += 8;
861 offPage += 8;
862 offSrc2 += 8;
863 cbLeft -= 8;
864 }
865
866 iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr);
867 /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */
868 }
869 /*
870 * Otherwise, we compare with constants and merge with the general mop-up.
871 */
872 else
873 {
874 while (cbLeft >= 8)
875 {
876 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr,
877 offPage / 8);
878 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val,
879 RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4],
880 pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
881 if (fPendingJmp)
882 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
883 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/);
884 else
885 {
886 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/);
887 fPendingJmp = true;
888 }
889 pbOpcodes += 8;
890 offPage += 8;
891 cbLeft -= 8;
892 }
893 /* max cost thus far: 21 */
894 }
895
896 /* Deal with any remaining bytes (7 or less). */
897 Assert(cbLeft < 8);
898 if (cbLeft >= 4)
899 {
900 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr,
901 offPage / 4);
902 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
903 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
904 if (fPendingJmp)
905 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
906 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
907 else
908 {
909 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
910 fPendingJmp = true;
911 }
912 pbOpcodes += 4;
913 offPage += 4;
914 cbLeft -= 4;
915
916 }
917
918 if (cbLeft >= 2)
919 {
920 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr,
921 offPage / 2);
922 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
923 if (fPendingJmp)
924 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
925 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
926 else
927 {
928 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
929 fPendingJmp = true;
930 }
931 pbOpcodes += 2;
932 offPage += 2;
933 cbLeft -= 2;
934 }
935
936 if (cbLeft > 0)
937 {
938 Assert(cbLeft == 1);
939 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
940 if (fPendingJmp)
941 {
942 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]);
943 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
944 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
945 }
946 else
947 {
948 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/);
949 fPendingJmp = true;
950 }
951 pbOpcodes += 1;
952 offPage += 1;
953 cbLeft -= 1;
954 }
955
956 iemNativeRegFreeTmp(pReNative, idxRegSrc2Val);
957 }
958 Assert(cbLeft == 0);
959
960 /*
961 * Finally, the branch on difference.
962 */
963 if (fPendingJmp)
964 {
965 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
966 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
967 }
968 RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump, idxLabelObsoleteTb);
969
970 /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 iemNativeRegFreeTmp(pReNative, idxRegSrc1Val);
973 iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr);
974
975#else
976# error "Port me"
977#endif
978 return off;
979}
980
981
982/** Duplicated in IEMAllThrdFuncsBltIn.cpp. */
983DECL_FORCE_INLINE(RTGCPHYS) iemTbGetRangePhysPageAddr(PCIEMTB pTb, uint8_t idxRange)
984{
985 Assert(idxRange < RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges)));
986 uint8_t const idxPage = pTb->aRanges[idxRange].idxPhysPage;
987 Assert(idxPage <= RT_ELEMENTS(pTb->aGCPhysPages));
988 if (idxPage == 0)
989 return pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
990 Assert(!(pTb->aGCPhysPages[idxPage - 1] & GUEST_PAGE_OFFSET_MASK));
991 return pTb->aGCPhysPages[idxPage - 1];
992}
993
994
995/**
996 * Macro that implements PC check after a conditional branch.
997 */
998#define BODY_CHECK_PC_AFTER_BRANCH(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
999 RT_NOREF(a_cbInstr); \
1000 off = iemNativeEmitBltInCheckPcAfterBranch(pReNative, off, a_pTb, a_idxRange, a_offRange)
1001
1002DECL_FORCE_INLINE(uint32_t)
1003iemNativeEmitBltInCheckPcAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb,
1004 uint8_t idxRange, uint16_t offRange)
1005{
1006#ifdef VBOX_STRICT
1007 off = iemNativeEmitMarker(pReNative, off, 0x80000004);
1008#endif
1009
1010 /*
1011 * The GCPhysRangePageWithOffset value in the threaded function is a fixed
1012 * constant for us here.
1013 *
1014 * We can pretend that iem.s.cbInstrBufTotal is X86_PAGE_SIZE here, because
1015 * it serves no purpose as a CS.LIM, if that's needed we've just performed
1016 * it, and as long as we don't implement code TLB reload code here there is
1017 * no point in checking that the TLB data we're using is still valid.
1018 *
1019 * What we to do is.
1020 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1021 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1022 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1023 * we're in the wrong spot and need to find a new TB.
1024 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1025 * GCPhysRangePageWithOffset constant mentioned above.
1026 *
1027 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1028 * in 64-bit code or flat 32-bit.
1029 */
1030
1031 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1032 the temp register, so we don't accidentally clobber something we'll be
1033 needing again immediately. This is why we get idxRegCsBase here. */
1034 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1035 kIemNativeGstRegUse_ReadOnly);
1036 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1037 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1038 kIemNativeGstRegUse_ReadOnly);
1039
1040 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1041
1042#ifdef VBOX_STRICT
1043 /* Do assertions before idxRegTmp contains anything. */
1044 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1045# ifdef RT_ARCH_AMD64
1046 {
1047 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1048 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1049 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1050 {
1051 /* cmp r/m64, imm8 */
1052 pbCodeBuf[off++] = X86_OP_REX_W;
1053 pbCodeBuf[off++] = 0x83;
1054 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1055 pbCodeBuf[off++] = 0;
1056 /* je rel8 */
1057 pbCodeBuf[off++] = 0x74;
1058 pbCodeBuf[off++] = 1;
1059 /* int3 */
1060 pbCodeBuf[off++] = 0xcc;
1061
1062 }
1063
1064 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1065 /* test r/m64, imm32 */
1066 pbCodeBuf[off++] = X86_OP_REX_W;
1067 pbCodeBuf[off++] = 0xf7;
1068 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1069 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1070 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1071 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1072 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1073 /* jz rel8 */
1074 pbCodeBuf[off++] = 0x74;
1075 pbCodeBuf[off++] = 1;
1076 /* int3 */
1077 pbCodeBuf[off++] = 0xcc;
1078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1079 }
1080# else
1081
1082 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1083 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1084 {
1085 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1086# ifdef RT_ARCH_ARM64
1087 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1088 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1089 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2004);
1090 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1091# else
1092# error "Port me!"
1093# endif
1094 }
1095# endif
1096
1097#endif /* VBOX_STRICT */
1098
1099 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1100 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1101 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1102 {
1103#ifdef RT_ARCH_ARM64
1104 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1105 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1106 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1107#else
1108 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1109 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1110#endif
1111 }
1112 else
1113 {
1114#ifdef RT_ARCH_ARM64
1115 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1116 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1117 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1119#else
1120 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1121 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1122 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1123#endif
1124 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1125 }
1126 iemNativeRegFreeTmp(pReNative, idxRegPc);
1127
1128 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal. */
1129 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1130 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_CheckBranchMiss);
1131
1132 /* 4. Add iem.s.GCPhysInstrBuf and compare with GCPhysRangePageWithOffset. */
1133#ifdef RT_ARCH_AMD64
1134 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1135 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1136 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1137 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1138 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1139
1140#elif defined(RT_ARCH_ARM64)
1141 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1142
1143 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1144 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1145 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1146 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1147
1148# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1149 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1150 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1151 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1152# endif
1153 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1154#else
1155# error "Port me"
1156#endif
1157
1158 RTGCPHYS const GCPhysRangePageWithOffset = ( iemTbGetRangePhysPageAddr(pTb, idxRange)
1159 | pTb->aRanges[idxRange].offPhysPage)
1160 + offRange;
1161 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegTmp, GCPhysRangePageWithOffset,
1162 kIemNativeLabelType_CheckBranchMiss);
1163
1164 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1165 return off;
1166}
1167
1168
1169/**
1170 * Macro that implements TLB loading and updating pbInstrBuf updating for an
1171 * instruction crossing into a new page.
1172 *
1173 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1174 */
1175#define BODY_LOAD_TLB_FOR_NEW_PAGE(a_pTb, a_offInstr, a_idxRange, a_cbInstr) \
1176 RT_NOREF(a_cbInstr); \
1177 off = iemNativeEmitBltLoadTlbForNewPage(pReNative, off, pTb, a_idxRange, a_offInstr)
1178
1179DECL_FORCE_INLINE(uint32_t)
1180iemNativeEmitBltLoadTlbForNewPage(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint8_t offInstr)
1181{
1182#ifdef VBOX_STRICT
1183 off = iemNativeEmitMarker(pReNative, off, 0x80000005);
1184#endif
1185
1186 /*
1187 * Move/spill/flush stuff out of call-volatile registers.
1188 * This is the easy way out. We could contain this to the tlb-miss branch
1189 * by saving and restoring active stuff here.
1190 */
1191 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
1192 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
1193
1194 /*
1195 * Define labels and allocate the register for holding the GCPhys of the new page.
1196 */
1197 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1198 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1199 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1200 uint32_t const idxRegGCPhys = iemNativeRegAllocTmp(pReNative, &off);
1201
1202 /*
1203 * First we try to go via the TLB.
1204 */
1205 /** @todo */
1206
1207 /*
1208 * TLB miss: Call iemNativeHlpMemCodeNewPageTlbMissWithOff to do the work.
1209 */
1210 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1211
1212 /* IEMNATIVE_CALL_ARG1_GREG = offInstr */
1213 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offInstr);
1214
1215 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1216 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1217
1218 /* Done setting up parameters, make the call. */
1219 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMissWithOff);
1220
1221 /* Move the result to the right register. */
1222 if (idxRegGCPhys != IEMNATIVE_CALL_RET_GREG)
1223 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegGCPhys, IEMNATIVE_CALL_RET_GREG);
1224
1225 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1226
1227 /*
1228 * Now check the physical address of the page matches the expected one.
1229 */
1230 RTGCPHYS const GCPhysNewPage = iemTbGetRangePhysPageAddr(pTb, idxRange);
1231 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegGCPhys, GCPhysNewPage,
1232 kIemNativeLabelType_ObsoleteTb);
1233
1234 iemNativeRegFreeTmp(pReNative, idxRegGCPhys);
1235 return off;
1236}
1237
1238
1239/**
1240 * Macro that implements TLB loading and updating pbInstrBuf updating when
1241 * branching or when crossing a page on an instruction boundrary.
1242 *
1243 * This differs from BODY_LOAD_TLB_FOR_NEW_PAGE in that it will first check if
1244 * it is an inter-page branch and also check the page offset.
1245 *
1246 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1247 */
1248#define BODY_LOAD_TLB_AFTER_BRANCH(a_pTb, a_idxRange, a_cbInstr) \
1249 RT_NOREF(a_cbInstr); \
1250 off = iemNativeEmitBltLoadTlbAfterBranch(pReNative, off, pTb, a_idxRange)
1251
1252#if 0
1253do { \
1254 /* Is RIP within the current code page? */ \
1255 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu)); \
1256 uint64_t const uPc = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base; \
1257 uint64_t const off = uPc - pVCpu->iem.s.uInstrBufPc; \
1258 if (off < pVCpu->iem.s.cbInstrBufTotal) \
1259 { \
1260 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK)); \
1261 Assert(pVCpu->iem.s.pbInstrBuf); \
1262 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(a_pTb, a_idxRange) \
1263 | pTb->aRanges[(a_idxRange)].offPhysPage; \
1264 if (GCPhysRangePageWithOffset == pVCpu->iem.s.GCPhysInstrBuf + off) \
1265 { /* we're good */ } \
1266 else \
1267 { \
1268 Log7(("TB jmp miss: %p at %04x:%08RX64 LB %u; branching/1; GCPhysWithOffset=%RGp expected %RGp, pbInstrBuf=%p - #%u\n", \
1269 (a_pTb), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, (a_cbInstr), \
1270 pVCpu->iem.s.GCPhysInstrBuf + off, GCPhysRangePageWithOffset, pVCpu->iem.s.pbInstrBuf, __LINE__)); \
1271 RT_NOREF(a_cbInstr); \
1272 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses); \
1273 return VINF_IEM_REEXEC_BREAK; \
1274 } \
1275 } \
1276 else \
1277 { \
1278 /* Must translate new RIP. */ \
1279 pVCpu->iem.s.pbInstrBuf = NULL; \
1280 pVCpu->iem.s.offCurInstrStart = 0; \
1281 pVCpu->iem.s.offInstrNextByte = 0; \
1282 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL); \
1283 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) || !pVCpu->iem.s.pbInstrBuf); \
1284 \
1285 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(a_pTb, a_idxRange) \
1286 | pTb->aRanges[(a_idxRange)].offPhysPage; \
1287 uint64_t const offNew = uPc - pVCpu->iem.s.uInstrBufPc; \
1288 if ( GCPhysRangePageWithOffset == pVCpu->iem.s.GCPhysInstrBuf + offNew \
1289 && pVCpu->iem.s.pbInstrBuf) \
1290 { /* likely */ } \
1291 else \
1292 { \
1293 Log7(("TB jmp miss: %p at %04x:%08RX64 LB %u; branching/2; GCPhysWithOffset=%RGp expected %RGp, pbInstrBuf=%p - #%u\n", \
1294 (a_pTb), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, (a_cbInstr), \
1295 pVCpu->iem.s.GCPhysInstrBuf + offNew, GCPhysRangePageWithOffset, pVCpu->iem.s.pbInstrBuf, __LINE__)); \
1296 RT_NOREF(a_cbInstr); \
1297 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses); \
1298 return VINF_IEM_REEXEC_BREAK; \
1299 } \
1300 } \
1301 } while(0)
1302#endif
1303
1304DECL_FORCE_INLINE(uint32_t)
1305iemNativeEmitBltLoadTlbAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange)
1306{
1307#ifdef VBOX_STRICT
1308 off = iemNativeEmitMarker(pReNative, off, 0x80000006);
1309#endif
1310
1311 /*
1312 * Define labels and allocate the register for holding the GCPhys of the new page.
1313 */
1314 uint32_t const idxLabelCheckBranchMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckBranchMiss);
1315 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1316 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1317 //
1318
1319 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(pTb, idxRange)
1320 | pTb->aRanges[idxRange].offPhysPage;
1321
1322 /*
1323 *
1324 * First check if RIP is within the current code.
1325 *
1326 * This is very similar to iemNativeEmitBltInCheckPcAfterBranch, the only
1327 * difference is what we do when stuff doesn't match up.
1328 *
1329 * What we to do is.
1330 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1331 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1332 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1333 * we need to retranslate RIP via the TLB.
1334 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1335 * GCPhysRangePageWithOffset constant mentioned above.
1336 *
1337 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1338 * in 64-bit code or flat 32-bit.
1339 *
1340 */
1341
1342 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1343 the temp register, so we don't accidentally clobber something we'll be
1344 needing again immediately. This is why we get idxRegCsBase here. */
1345 /** @todo save+restore active registers and guest shadows in tlb-miss! */
1346 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
1347 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1348 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1349 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1350 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1351 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1352
1353 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off); /* volatile reg is okay for these two */
1354 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1355
1356#ifdef VBOX_STRICT
1357 /* Do assertions before idxRegTmp contains anything. */
1358 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1359# ifdef RT_ARCH_AMD64
1360 {
1361 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1362 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1363 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1364 {
1365 /* cmp r/m64, imm8 */
1366 pbCodeBuf[off++] = X86_OP_REX_W;
1367 pbCodeBuf[off++] = 0x83;
1368 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1369 pbCodeBuf[off++] = 0;
1370 /* je rel8 */
1371 pbCodeBuf[off++] = 0x74;
1372 pbCodeBuf[off++] = 1;
1373 /* int3 */
1374 pbCodeBuf[off++] = 0xcc;
1375
1376 }
1377
1378 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1379 /* test r/m64, imm32 */
1380 pbCodeBuf[off++] = X86_OP_REX_W;
1381 pbCodeBuf[off++] = 0xf7;
1382 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1383 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1384 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1385 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1386 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1387 /* jz rel8 */
1388 pbCodeBuf[off++] = 0x74;
1389 pbCodeBuf[off++] = 1;
1390 /* int3 */
1391 pbCodeBuf[off++] = 0xcc;
1392 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1393 }
1394# else
1395
1396 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1397 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1398 {
1399 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1400# ifdef RT_ARCH_ARM64
1401 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1402 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1403 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2006);
1404 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1405# else
1406# error "Port me!"
1407# endif
1408 }
1409# endif
1410
1411#endif /* VBOX_STRICT */
1412
1413 /* Because we're lazy, we'll jump back here to recalc 'off' and share the
1414 GCPhysRangePageWithOffset check. This is a little risky, so we use the
1415 2nd register to check if we've looped more than once already.*/
1416 off = iemNativeEmitGprZero(pReNative, off, idxRegTmp2);
1417
1418 uint32_t const offLabelRedoChecks = off;
1419
1420 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1421 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1422 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1423 {
1424#ifdef RT_ARCH_ARM64
1425 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1426 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1427 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1428#else
1429 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1430 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1431#endif
1432 }
1433 else
1434 {
1435#ifdef RT_ARCH_ARM64
1436 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1437 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1438 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1439 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1440#else
1441 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1442 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1443 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1444#endif
1445 }
1446
1447 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal.
1448 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll jump to the TLB loading if this fails. */
1449 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1450 uint32_t const offFixedJumpToTlbLoad = off;
1451 off = iemNativeEmitJaToFixed(pReNative, off, off /* (ASSUME ja rel8 suffices) */);
1452
1453 /* 4a. Add iem.s.GCPhysInstrBuf to off ... */
1454#ifdef RT_ARCH_AMD64
1455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1456 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1457 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1458 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1459 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1460
1461#elif defined(RT_ARCH_ARM64)
1462
1463 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1464 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1465 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1466 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1467
1468# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1469 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1470 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1471 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1472# endif
1473#else
1474# error "Port me"
1475#endif
1476
1477 /* 4b. ... and compare with GCPhysRangePageWithOffset.
1478
1479 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll have to be more
1480 careful and avoid implicit temporary register usage here.
1481
1482 Unlike the threaded version of this code, we do not obsolete TBs here to
1483 reduce the code size and because indirect calls may legally end at the
1484 same offset in two different pages depending on the program state. */
1485 /** @todo synch the threaded BODY_LOAD_TLB_AFTER_BRANCH version with this. */
1486 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegTmp2, GCPhysRangePageWithOffset);
1487 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegTmp, idxRegTmp2);
1488 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelCheckBranchMiss);
1489 uint32_t const offFixedJumpToEnd = off;
1490 off = iemNativeEmitJmpToFixed(pReNative, off, off + 512 /* force rel32 */);
1491
1492 /*
1493 * First we try to go via the TLB.
1494 */
1495 iemNativeFixupFixedJump(pReNative, offFixedJumpToTlbLoad, off);
1496
1497 /* Check that we haven't been here before. */
1498 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxRegTmp2, false /*f64Bit*/, idxLabelCheckBranchMiss);
1499
1500 /*
1501 * TLB miss: Call iemNativeHlpMemCodeNewPageTlbMiss to do the work.
1502 */
1503 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1504
1505 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1506 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1507
1508 /* Done setting up parameters, make the call. */
1509 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMiss);
1510
1511 /* Jmp back to the start and redo the checks. */
1512 off = iemNativeEmitLoadGpr8Imm(pReNative, off, idxRegTmp2, 1); /* indicate that we've looped once already */
1513 off = iemNativeEmitJmpToFixed(pReNative, off, offLabelRedoChecks);
1514
1515 /* The end. */
1516 iemNativeFixupFixedJump(pReNative, offFixedJumpToEnd, off);
1517
1518 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1519 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1520 iemNativeRegFreeTmp(pReNative, idxRegPc);
1521 if (idxRegCsBase != UINT8_MAX)
1522 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1523 return off;
1524}
1525
1526
1527#ifdef BODY_CHECK_CS_LIM
1528/**
1529 * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
1530 * raising a \#GP(0) if this isn't the case.
1531 */
1532IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
1533{
1534 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1535 BODY_SET_CUR_INSTR();
1536 BODY_FLUSH_PENDING_WRITES();
1537 BODY_CHECK_CS_LIM(cbInstr);
1538 return off;
1539}
1540#endif
1541
1542
1543#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
1544/**
1545 * Built-in function for re-checking opcodes and CS.LIM after an instruction
1546 * that may have modified them.
1547 */
1548IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
1549{
1550 PCIEMTB const pTb = pReNative->pTbOrg;
1551 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1552 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1553 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1554 BODY_SET_CUR_INSTR();
1555 BODY_FLUSH_PENDING_WRITES();
1556 BODY_CHECK_CS_LIM(cbInstr);
1557 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1558 return off;
1559}
1560#endif
1561
1562
1563#if defined(BODY_CHECK_OPCODES)
1564/**
1565 * Built-in function for re-checking opcodes after an instruction that may have
1566 * modified them.
1567 */
1568IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
1569{
1570 PCIEMTB const pTb = pReNative->pTbOrg;
1571 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1572 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1573 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1574 BODY_SET_CUR_INSTR();
1575 BODY_FLUSH_PENDING_WRITES();
1576 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1577 return off;
1578}
1579#endif
1580
1581
1582#if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1583/**
1584 * Built-in function for re-checking opcodes and considering the need for CS.LIM
1585 * checking after an instruction that may have modified them.
1586 */
1587IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
1588{
1589 PCIEMTB const pTb = pReNative->pTbOrg;
1590 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1591 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1592 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1593 BODY_SET_CUR_INSTR();
1594 BODY_FLUSH_PENDING_WRITES();
1595 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1596 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1597 return off;
1598}
1599#endif
1600
1601
1602/*
1603 * Post-branching checkers.
1604 */
1605
1606#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1607/**
1608 * Built-in function for checking CS.LIM, checking the PC and checking opcodes
1609 * after conditional branching within the same page.
1610 *
1611 * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
1612 */
1613IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
1614{
1615 PCIEMTB const pTb = pReNative->pTbOrg;
1616 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1617 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1618 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1619 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1620 BODY_SET_CUR_INSTR();
1621 BODY_FLUSH_PENDING_WRITES();
1622 BODY_CHECK_CS_LIM(cbInstr);
1623 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1624 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1625 //LogFunc(("okay\n"));
1626 return off;
1627}
1628#endif
1629
1630
1631#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
1632/**
1633 * Built-in function for checking the PC and checking opcodes after conditional
1634 * branching within the same page.
1635 *
1636 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1637 */
1638IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
1639{
1640 PCIEMTB const pTb = pReNative->pTbOrg;
1641 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1642 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1643 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1644 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1645 BODY_SET_CUR_INSTR();
1646 BODY_FLUSH_PENDING_WRITES();
1647 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1648 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1649 //LogFunc(("okay\n"));
1650 return off;
1651}
1652#endif
1653
1654
1655#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1656/**
1657 * Built-in function for checking the PC and checking opcodes and considering
1658 * the need for CS.LIM checking after conditional branching within the same
1659 * page.
1660 *
1661 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1662 */
1663IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
1664{
1665 PCIEMTB const pTb = pReNative->pTbOrg;
1666 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1667 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1668 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1669 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1670 BODY_SET_CUR_INSTR();
1671 BODY_FLUSH_PENDING_WRITES();
1672 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1673 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1674 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1675 //LogFunc(("okay\n"));
1676 return off;
1677}
1678#endif
1679
1680
1681#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1682/**
1683 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1684 * transitioning to a different code page.
1685 *
1686 * The code page transition can either be natural over onto the next page (with
1687 * the instruction starting at page offset zero) or by means of branching.
1688 *
1689 * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1690 */
1691IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
1692{
1693 PCIEMTB const pTb = pReNative->pTbOrg;
1694 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1695 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1696 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1697 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1698 BODY_SET_CUR_INSTR();
1699 BODY_FLUSH_PENDING_WRITES();
1700 BODY_CHECK_CS_LIM(cbInstr);
1701 Assert(offRange == 0);
1702 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1703 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1704 //LogFunc(("okay\n"));
1705 return off;
1706}
1707#endif
1708
1709
1710#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
1711/**
1712 * Built-in function for loading TLB and checking opcodes when transitioning to
1713 * a different code page.
1714 *
1715 * The code page transition can either be natural over onto the next page (with
1716 * the instruction starting at page offset zero) or by means of branching.
1717 *
1718 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1719 */
1720IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
1721{
1722 PCIEMTB const pTb = pReNative->pTbOrg;
1723 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1724 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1725 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1726 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1727 BODY_SET_CUR_INSTR();
1728 BODY_FLUSH_PENDING_WRITES();
1729 Assert(offRange == 0);
1730 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1731 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1732 //LogFunc(("okay\n"));
1733 return off;
1734}
1735#endif
1736
1737
1738#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1739/**
1740 * Built-in function for loading TLB and checking opcodes and considering the
1741 * need for CS.LIM checking when transitioning to a different code page.
1742 *
1743 * The code page transition can either be natural over onto the next page (with
1744 * the instruction starting at page offset zero) or by means of branching.
1745 *
1746 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1747 */
1748IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
1749{
1750 PCIEMTB const pTb = pReNative->pTbOrg;
1751 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1752 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1753 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1754 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1755 BODY_SET_CUR_INSTR();
1756 BODY_FLUSH_PENDING_WRITES();
1757 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1758 Assert(offRange == 0);
1759 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1760 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1761 //LogFunc(("okay\n"));
1762 return off;
1763}
1764#endif
1765
1766
1767
1768/*
1769 * Natural page crossing checkers.
1770 */
1771
1772#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1773/**
1774 * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
1775 * both pages when transitioning to a different code page.
1776 *
1777 * This is used when the previous instruction requires revalidation of opcodes
1778 * bytes and the current instruction stries a page boundrary with opcode bytes
1779 * in both the old and new page.
1780 *
1781 * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1782 */
1783IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
1784{
1785 PCIEMTB const pTb = pReNative->pTbOrg;
1786 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1787 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1788 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1789 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1790 uint32_t const idxRange2 = idxRange1 + 1;
1791 BODY_SET_CUR_INSTR();
1792 BODY_FLUSH_PENDING_WRITES();
1793 BODY_CHECK_CS_LIM(cbInstr);
1794 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1795 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1796 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1797 return off;
1798}
1799#endif
1800
1801
1802#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1803/**
1804 * Built-in function for loading TLB and checking opcodes on both pages when
1805 * transitioning to a different code page.
1806 *
1807 * This is used when the previous instruction requires revalidation of opcodes
1808 * bytes and the current instruction stries a page boundrary with opcode bytes
1809 * in both the old and new page.
1810 *
1811 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1812 */
1813IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
1814{
1815 PCIEMTB const pTb = pReNative->pTbOrg;
1816 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1817 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1818 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1819 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1820 uint32_t const idxRange2 = idxRange1 + 1;
1821 BODY_SET_CUR_INSTR();
1822 BODY_FLUSH_PENDING_WRITES();
1823 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1824 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1825 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1826 return off;
1827}
1828#endif
1829
1830
1831#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1832/**
1833 * Built-in function for loading TLB and checking opcodes on both pages and
1834 * considering the need for CS.LIM checking when transitioning to a different
1835 * code page.
1836 *
1837 * This is used when the previous instruction requires revalidation of opcodes
1838 * bytes and the current instruction stries a page boundrary with opcode bytes
1839 * in both the old and new page.
1840 *
1841 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1842 */
1843IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
1844{
1845 PCIEMTB const pTb = pReNative->pTbOrg;
1846 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1847 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1848 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1849 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1850 uint32_t const idxRange2 = idxRange1 + 1;
1851 BODY_SET_CUR_INSTR();
1852 BODY_FLUSH_PENDING_WRITES();
1853 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1854 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1855 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1856 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1857 return off;
1858}
1859#endif
1860
1861
1862#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1863/**
1864 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1865 * advancing naturally to a different code page.
1866 *
1867 * Only opcodes on the new page is checked.
1868 *
1869 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1870 */
1871IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
1872{
1873 PCIEMTB const pTb = pReNative->pTbOrg;
1874 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1875 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1876 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1877 //uint32_t const offRange1 = (uint32_t)uParam2;
1878 uint32_t const idxRange2 = idxRange1 + 1;
1879 BODY_SET_CUR_INSTR();
1880 BODY_FLUSH_PENDING_WRITES();
1881 BODY_CHECK_CS_LIM(cbInstr);
1882 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1883 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1884 return off;
1885}
1886#endif
1887
1888
1889#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1890/**
1891 * Built-in function for loading TLB and checking opcodes when advancing
1892 * naturally to a different code page.
1893 *
1894 * Only opcodes on the new page is checked.
1895 *
1896 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1897 */
1898IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
1899{
1900 PCIEMTB const pTb = pReNative->pTbOrg;
1901 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1902 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1903 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1904 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1905 uint32_t const idxRange2 = idxRange1 + 1;
1906 BODY_SET_CUR_INSTR();
1907 BODY_FLUSH_PENDING_WRITES();
1908 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1909 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1910 return off;
1911}
1912#endif
1913
1914
1915#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1916/**
1917 * Built-in function for loading TLB and checking opcodes and considering the
1918 * need for CS.LIM checking when advancing naturally to a different code page.
1919 *
1920 * Only opcodes on the new page is checked.
1921 *
1922 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1923 */
1924IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
1925{
1926 PCIEMTB const pTb = pReNative->pTbOrg;
1927 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1928 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1929 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1930 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1931 uint32_t const idxRange2 = idxRange1 + 1;
1932 BODY_SET_CUR_INSTR();
1933 BODY_FLUSH_PENDING_WRITES();
1934 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1935 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1936 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1937 return off;
1938}
1939#endif
1940
1941
1942#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1943/**
1944 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1945 * advancing naturally to a different code page with first instr at byte 0.
1946 *
1947 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1948 */
1949IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
1950{
1951 PCIEMTB const pTb = pReNative->pTbOrg;
1952 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1953 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1954 BODY_SET_CUR_INSTR();
1955 BODY_FLUSH_PENDING_WRITES();
1956 BODY_CHECK_CS_LIM(cbInstr);
1957 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1958 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1959 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1960 return off;
1961}
1962#endif
1963
1964
1965#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1966/**
1967 * Built-in function for loading TLB and checking opcodes when advancing
1968 * naturally to a different code page with first instr at byte 0.
1969 *
1970 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1971 */
1972IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
1973{
1974 PCIEMTB const pTb = pReNative->pTbOrg;
1975 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1976 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1977 BODY_SET_CUR_INSTR();
1978 BODY_FLUSH_PENDING_WRITES();
1979 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1980 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1981 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1982 return off;
1983}
1984#endif
1985
1986
1987#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1988/**
1989 * Built-in function for loading TLB and checking opcodes and considering the
1990 * need for CS.LIM checking when advancing naturally to a different code page
1991 * with first instr at byte 0.
1992 *
1993 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1994 */
1995IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
1996{
1997 PCIEMTB const pTb = pReNative->pTbOrg;
1998 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1999 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
2000 BODY_SET_CUR_INSTR();
2001 BODY_FLUSH_PENDING_WRITES();
2002 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
2003 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
2004 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
2005 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
2006 return off;
2007}
2008#endif
2009
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette