VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompBltIn.cpp@ 102841

Last change on this file since 102841 was 102841, checked in by vboxsync, 15 months ago

VMM/IEM: More stats. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 86.3 KB
Line 
1/* $Id: IEMAllN8veRecompBltIn.cpp 102841 2024-01-11 13:48:35Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, Emitters for Built-In Threaded Functions.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33//#define IEM_WITH_OPAQUE_DECODER_STATE - need offCurInstrStart access for iemNativeHlpMemCodeNewPageTlbMiss and friends.
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#include <VBox/vmm/iem.h>
37#include <VBox/vmm/cpum.h>
38#include <VBox/vmm/dbgf.h>
39#include "IEMInternal.h"
40#include <VBox/vmm/vmcc.h>
41#include <VBox/log.h>
42#include <VBox/err.h>
43#include <VBox/param.h>
44#include <iprt/assert.h>
45#include <iprt/string.h>
46#if defined(RT_ARCH_AMD64)
47# include <iprt/x86.h>
48#elif defined(RT_ARCH_ARM64)
49# include <iprt/armv8.h>
50#endif
51
52
53#include "IEMInline.h"
54#include "IEMThreadedFunctions.h"
55#include "IEMN8veRecompiler.h"
56#include "IEMN8veRecompilerEmit.h"
57
58
59
60/*********************************************************************************************************************************
61* TB Helper Functions *
62*********************************************************************************************************************************/
63#ifdef RT_ARCH_AMD64
64DECLASM(void) iemNativeHlpAsmSafeWrapLogCpuState(void);
65#endif
66
67
68/**
69 * Used by TB code to deal with a TLB miss for a new page.
70 */
71IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCodeNewPageTlbMiss,(PVMCPUCC pVCpu))
72{
73 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeCodeTlbMissesNewPage);
74 pVCpu->iem.s.pbInstrBuf = NULL;
75 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE;
76 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
77 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
78 if (pVCpu->iem.s.pbInstrBuf)
79 { /* likely */ }
80 else
81 {
82 IEM_DO_LONGJMP(pVCpu, VINF_IEM_REEXEC_BREAK);
83 }
84}
85
86
87/**
88 * Used by TB code to deal with a TLB miss for a new page.
89 */
90IEM_DECL_NATIVE_HLP_DEF(RTGCPHYS, iemNativeHlpMemCodeNewPageTlbMissWithOff,(PVMCPUCC pVCpu, uint8_t offInstr))
91{
92 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeCodeTlbMissesNewPage);
93 pVCpu->iem.s.pbInstrBuf = NULL;
94 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE - offInstr;
95 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
96 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
97 return pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf : NIL_RTGCPHYS;
98}
99
100
101/*********************************************************************************************************************************
102* Builtin functions *
103*********************************************************************************************************************************/
104
105/**
106 * Built-in function that does nothing.
107 *
108 * Whether this is called or not can be controlled by the entry in the
109 * IEMThreadedGenerator.katBltIns table. This can be useful to determine
110 * whether why behaviour changes when enabling the LogCpuState builtins. I.e.
111 * whether it's the reduced call count in the TBs or the threaded calls flushing
112 * register state.
113 */
114IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_Nop)
115{
116 RT_NOREF(pReNative, pCallEntry);
117 return off;
118}
119
120
121/**
122 * Emits for for LogCpuState.
123 *
124 * This shouldn't have any relevant impact on the recompiler state.
125 */
126IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_LogCpuState)
127{
128#ifdef RT_ARCH_AMD64
129 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
130 /* push rax */
131 pbCodeBuf[off++] = 0x50 + X86_GREG_xAX;
132 /* push imm32 */
133 pbCodeBuf[off++] = 0x68;
134 pbCodeBuf[off++] = RT_BYTE1(pCallEntry->auParams[0]);
135 pbCodeBuf[off++] = RT_BYTE2(pCallEntry->auParams[0]);
136 pbCodeBuf[off++] = RT_BYTE3(pCallEntry->auParams[0]);
137 pbCodeBuf[off++] = RT_BYTE4(pCallEntry->auParams[0]);
138 /* mov rax, iemNativeHlpAsmSafeWrapLogCpuState */
139 pbCodeBuf[off++] = X86_OP_REX_W;
140 pbCodeBuf[off++] = 0xb8 + X86_GREG_xAX;
141 *(uint64_t *)&pbCodeBuf[off] = (uintptr_t)iemNativeHlpAsmSafeWrapLogCpuState;
142 off += sizeof(uint64_t);
143 /* call rax */
144 pbCodeBuf[off++] = 0xff;
145 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
146 /* pop rax */
147 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
148 /* pop rax */
149 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
150 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
151
152#else
153 /** @todo Implement this */
154 AssertFailed();
155 RT_NOREF(pReNative, pCallEntry);
156#endif
157 return off;
158}
159
160
161/**
162 * Built-in function that calls a C-implemention function taking zero arguments.
163 */
164IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
165{
166 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
167 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
168 uint64_t const fGstShwFlush = pCallEntry->auParams[2];
169 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
170}
171
172
173/**
174 * Built-in function that checks for pending interrupts that can be delivered or
175 * forced action flags.
176 *
177 * This triggers after the completion of an instruction, so EIP is already at
178 * the next instruction. If an IRQ or important FF is pending, this will return
179 * a non-zero status that stops TB execution.
180 */
181IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
182{
183 RT_NOREF(pCallEntry);
184
185 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
186 and I'm too lazy to create a 'Fixed' version of that one. */
187 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
188 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
189
190 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
191
192 /* Again, we need to load the extended EFLAGS before we actually need them
193 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
194 loaded them inside the check, as the shadow state would not be correct
195 when the code branches before the load. Ditto PC. */
196 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
197 kIemNativeGstRegUse_ReadOnly);
198
199 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
200
201 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
202
203 /*
204 * Start by checking the local forced actions of the EMT we're on for IRQs
205 * and other FFs that needs servicing.
206 */
207 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
208 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
209 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
210 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
211 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
212 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
213 | VMCPU_FF_TLB_FLUSH
214 | VMCPU_FF_UNHALT ),
215 true /*fSetFlags*/);
216 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
217 uint32_t const offFixupJumpToVmCheck1 = off;
218 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices */);
219
220 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
221 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
222 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
223 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
224 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
225 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
226
227 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
228 suppressed by the CPU or not. */
229 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
230 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
231 idxLabelReturnBreak);
232
233 /* We've got shadow flags set, so we must check that the PC they are valid
234 for matches our current PC value. */
235 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
236 * a register. */
237 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
238 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
239
240 /*
241 * Now check the force flags of the VM.
242 */
243 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
244 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
245 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
246 off = iemNativeEmitLoadGprByGprU32(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
247 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
248 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
249
250 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
251
252 /*
253 * We're good, no IRQs or FFs pending.
254 */
255 iemNativeRegFreeTmp(pReNative, idxTmpReg);
256 iemNativeRegFreeTmp(pReNative, idxEflReg);
257 iemNativeRegFreeTmp(pReNative, idxPcReg);
258
259 return off;
260}
261
262
263/**
264 * Built-in function checks if IEMCPU::fExec has the expected value.
265 */
266IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
267{
268 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
269 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
270
271 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
272 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
273 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
274 kIemNativeLabelType_ReturnBreak);
275 iemNativeRegFreeTmp(pReNative, idxTmpReg);
276 return off;
277}
278
279
280/**
281 * Sets idxTbCurInstr in preparation of raising an exception or aborting the TB.
282 */
283/** @todo Optimize this, so we don't set the same value more than once. Just
284 * needs some tracking. */
285#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
286# define BODY_SET_CUR_INSTR() \
287 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
288#else
289# define BODY_SET_CUR_INSTR() ((void)0)
290#endif
291
292/**
293 * Flushes pending writes in preparation of raising an exception or aborting the TB.
294 */
295#define BODY_FLUSH_PENDING_WRITES() \
296 off = iemNativeRegFlushPendingWrites(pReNative, off);
297
298
299/**
300 * Macro that emits the 16/32-bit CS.LIM check.
301 */
302#define BODY_CHECK_CS_LIM(a_cbInstr) \
303 off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
304
305DECL_FORCE_INLINE(uint32_t)
306iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
307{
308 Assert(cbInstr > 0);
309 Assert(cbInstr < 16);
310#ifdef VBOX_STRICT
311 off = iemNativeEmitMarker(pReNative, off, 0x80000001);
312#endif
313
314 /*
315 * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
316 * a temporary register for calculating the last address of the instruction.
317 *
318 * The calculation and comparisons are 32-bit. We ASSUME that the incoming
319 * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
320 * that last updated EIP here checked it already, and that we're therefore
321 * safe in the 32-bit wrap-around scenario to only check that the last byte
322 * is within CS.LIM. In the case of instruction-by-instruction advancing
323 * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
324 * must be using 4KB granularity and the previous instruction was fine.
325 */
326 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
327 kIemNativeGstRegUse_ReadOnly);
328 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
329 kIemNativeGstRegUse_ReadOnly);
330#ifdef RT_ARCH_AMD64
331 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
332#elif defined(RT_ARCH_ARM64)
333 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
334#else
335# error "Port me"
336#endif
337
338 if (cbInstr != 1)
339 {
340 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
341
342 /*
343 * 1. idxRegTmp = idxRegPc + cbInstr;
344 * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
345 */
346#ifdef RT_ARCH_AMD64
347 /* 1. lea tmp32, [Pc + cbInstr - 1] */
348 if (idxRegTmp >= 8 || idxRegPc >= 8)
349 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
350 pbCodeBuf[off++] = 0x8d;
351 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
352 if ((idxRegPc & 7) == X86_GREG_xSP)
353 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
354 pbCodeBuf[off++] = cbInstr - 1;
355
356 /* 2. cmp tmp32(r), CsLim(r/m). */
357 if (idxRegTmp >= 8 || idxRegCsLim >= 8)
358 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
359 pbCodeBuf[off++] = 0x3b;
360 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
361
362#elif defined(RT_ARCH_ARM64)
363 /* 1. add tmp32, Pc, #cbInstr-1 */
364 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
365 /* 2. cmp tmp32, CsLim */
366 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
367 false /*f64Bit*/, true /*fSetFlags*/);
368
369#endif
370 iemNativeRegFreeTmp(pReNative, idxRegTmp);
371 }
372 else
373 {
374 /*
375 * Here we can skip step 1 and compare PC and CS.LIM directly.
376 */
377#ifdef RT_ARCH_AMD64
378 /* 2. cmp eip(r), CsLim(r/m). */
379 if (idxRegPc >= 8 || idxRegCsLim >= 8)
380 pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
381 pbCodeBuf[off++] = 0x3b;
382 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
383
384#elif defined(RT_ARCH_ARM64)
385 /* 2. cmp Pc, CsLim */
386 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
387 false /*f64Bit*/, true /*fSetFlags*/);
388
389#endif
390 }
391
392 /* 3. Jump if greater. */
393 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
394
395 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
396 iemNativeRegFreeTmp(pReNative, idxRegPc);
397 return off;
398}
399
400
401/**
402 * Macro that considers whether we need CS.LIM checking after a branch or
403 * crossing over to a new page.
404 */
405#define BODY_CONSIDER_CS_LIM_CHECKING(a_pTb, a_cbInstr) \
406 RT_NOREF(a_cbInstr); \
407 off = iemNativeEmitBltInConsiderLimChecking(pReNative, off)
408
409DECL_FORCE_INLINE(uint32_t)
410iemNativeEmitBltInConsiderLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off)
411{
412#ifdef VBOX_STRICT
413 off = iemNativeEmitMarker(pReNative, off, 0x80000002);
414#endif
415
416 /*
417 * This check must match the ones in the iem in iemGetTbFlagsForCurrentPc
418 * exactly:
419 *
420 * int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
421 * if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
422 * return fRet;
423 * return fRet | IEMTB_F_CS_LIM_CHECKS;
424 *
425 *
426 * We need EIP, CS.LIM and CS.BASE here.
427 */
428
429 /* Calculate the offFromLim first: */
430 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
431 kIemNativeGstRegUse_ReadOnly);
432 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
433 kIemNativeGstRegUse_ReadOnly);
434 uint8_t const idxRegLeft = iemNativeRegAllocTmp(pReNative, &off);
435
436#ifdef RT_ARCH_ARM64
437 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
438 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegLeft, idxRegCsLim, idxRegPc);
439 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
440#else
441 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegLeft, idxRegCsLim);
442 off = iemNativeEmitSubTwoGprs(pReNative, off, idxRegLeft, idxRegPc);
443#endif
444
445 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
446 iemNativeRegFreeTmp(pReNative, idxRegPc);
447
448 /* Calculate the threshold level (right side). */
449 uint8_t const idxRegCsBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
450 kIemNativeGstRegUse_ReadOnly);
451 uint8_t const idxRegRight = iemNativeRegAllocTmp(pReNative, &off);
452
453#ifdef RT_ARCH_ARM64
454 pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
455 Assert(Armv8A64ConvertImmRImmS2Mask32(11, 0) == GUEST_PAGE_OFFSET_MASK);
456 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegRight, idxRegCsBase, 11, 0, false /*f64Bit*/);
457 pu32CodeBuf[off++] = Armv8A64MkInstrNeg(idxRegRight);
458 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
459 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
460 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
461
462#else
463 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegRight, GUEST_PAGE_OFFSET_MASK);
464 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegRight, idxRegCsBase);
465 off = iemNativeEmitNegGpr(pReNative, off, idxRegRight);
466 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRight, X86_PAGE_SIZE + 16);
467#endif
468
469 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
470
471 /* Compare the two and jump out if we're too close to the limit. */
472 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegLeft, idxRegRight);
473 off = iemNativeEmitJlToNewLabel(pReNative, off, kIemNativeLabelType_NeedCsLimChecking);
474
475 iemNativeRegFreeTmp(pReNative, idxRegRight);
476 iemNativeRegFreeTmp(pReNative, idxRegLeft);
477 return off;
478}
479
480
481
482/**
483 * Macro that implements opcode (re-)checking.
484 */
485#define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
486 RT_NOREF(a_cbInstr); \
487 off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
488
489#if 0 /* debugging aid */
490bool g_fBpOnObsoletion = false;
491# define BP_ON_OBSOLETION g_fBpOnObsoletion
492#else
493# define BP_ON_OBSOLETION 0
494#endif
495
496DECL_FORCE_INLINE(uint32_t)
497iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
498{
499 Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
500 Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
501#ifdef VBOX_STRICT
502 off = iemNativeEmitMarker(pReNative, off, 0x80000003);
503#endif
504
505 uint32_t const idxLabelObsoleteTb = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ObsoleteTb);
506
507 /*
508 * Where to start and how much to compare.
509 *
510 * Looking at the ranges produced when r160746 was running a DOS VM with TB
511 * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
512 * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
513 *
514 * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
515 * 12, 10, 11, 6, 13, 15 and 16. Max 0x359 bytes. Same revision as above.
516 */
517 uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange;
518 uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange;
519 Assert(cbLeft > 0);
520 uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes + offRange];
521 uint32_t offConsolidatedJump = UINT32_MAX;
522
523#ifdef RT_ARCH_AMD64
524 /* AMD64/x86 offers a bunch of options. Smaller stuff will can be
525 completely inlined, for larger we use REPE CMPS. */
526# define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */ do { \
527 pbCodeBuf[off++] = a_bOpcode; \
528 Assert(offPage < 127); \
529 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
530 pbCodeBuf[off++] = RT_BYTE1(offPage); \
531 } while (0)
532
533# define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
534 if (offConsolidatedJump != UINT32_MAX) \
535 { \
536 int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
537 Assert(offDisp >= -128); \
538 pbCodeBuf[off++] = 0x75; /* jnz near */ \
539 pbCodeBuf[off++] = (uint8_t)offDisp; \
540 } \
541 else \
542 { \
543 pbCodeBuf[off++] = 0x74; /* jz near +5 */ \
544 pbCodeBuf[off++] = 0x05 + BP_ON_OBSOLETION; \
545 offConsolidatedJump = off; \
546 if (BP_ON_OBSOLETION) pbCodeBuf[off++] = 0xcc; \
547 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */ \
548 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_Rel32, -4); \
549 pbCodeBuf[off++] = 0x00; \
550 pbCodeBuf[off++] = 0x00; \
551 pbCodeBuf[off++] = 0x00; \
552 pbCodeBuf[off++] = 0x00; \
553 } \
554 } while (0)
555
556# define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
557 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
558 pbCodeBuf[off++] = *pbOpcodes++; \
559 pbCodeBuf[off++] = *pbOpcodes++; \
560 pbCodeBuf[off++] = *pbOpcodes++; \
561 pbCodeBuf[off++] = *pbOpcodes++; \
562 cbLeft -= 4; \
563 offPage += 4; \
564 CHECK_OPCODES_CMP_JMP(); \
565 } while (0)
566
567# define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
568 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
569 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
570 pbCodeBuf[off++] = *pbOpcodes++; \
571 pbCodeBuf[off++] = *pbOpcodes++; \
572 cbLeft -= 2; \
573 offPage += 2; \
574 CHECK_OPCODES_CMP_JMP(); \
575 } while (0)
576
577# define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
578 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
579 pbCodeBuf[off++] = *pbOpcodes++; \
580 cbLeft -= 1; \
581 offPage += 1; \
582 CHECK_OPCODES_CMP_JMP(); \
583 } while (0)
584
585# define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
586 if (a_bPrefix) \
587 pbCodeBuf[off++] = (a_bPrefix); \
588 pbCodeBuf[off++] = (a_bOpcode); \
589 CHECK_OPCODES_CMP_JMP(); \
590 cbLeft -= (a_cbToSubtract); \
591 } while (0)
592
593# define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
594 pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
595 pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
596 pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
597 pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
598 pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
599 } while (0)
600
601 if (cbLeft <= 24)
602 {
603 uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
604 ( RT_BIT_32(X86_GREG_xAX)
605 | RT_BIT_32(X86_GREG_xCX)
606 | RT_BIT_32(X86_GREG_xDX)
607 | RT_BIT_32(X86_GREG_xBX)
608 | RT_BIT_32(X86_GREG_xSI)
609 | RT_BIT_32(X86_GREG_xDI))
610 & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
611 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
612 if (offPage >= 128 - cbLeft)
613 {
614 off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
615 offPage &= 3;
616 }
617
618 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 14 + 54 + 8 + 6 + BP_ON_OBSOLETION /* = 87 */);
619
620 if (cbLeft > 8)
621 switch (offPage & 3)
622 {
623 case 0:
624 break;
625 case 1: /* cost: 6 + 8 = 14 */
626 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
627 RT_FALL_THRU();
628 case 2: /* cost: 8 */
629 CHECK_OPCODES_CMP_IMM16(idxRegTmp);
630 break;
631 case 3: /* cost: 6 */
632 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
633 break;
634 }
635
636 while (cbLeft >= 4)
637 CHECK_OPCODES_CMP_IMM32(idxRegTmp); /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
638
639 if (cbLeft >= 2)
640 CHECK_OPCODES_CMP_IMM16(idxRegTmp); /* cost: 8 */
641 if (cbLeft)
642 CHECK_OPCODES_CMP_IMM8(idxRegTmp); /* cost: 6 */
643
644 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
645 iemNativeRegFreeTmp(pReNative, idxRegTmp);
646 }
647 else
648 {
649 /* RDI = &pbInstrBuf[offPage] */
650 uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
651 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
652 if (offPage != 0)
653 off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
654
655 /* RSI = pbOpcodes */
656 uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
657 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
658
659 /* RCX = counts. */
660 uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
661
662 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 10 + 5 + 5 + 3 + 4 + 3 + BP_ON_OBSOLETION /*= 35*/);
663
664 /** @todo profile and optimize this further. Maybe an idea to align by
665 * offPage if the two cannot be reconsidled. */
666 /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
667 switch (offPage & 7) /* max cost: 10 */
668 {
669 case 0:
670 break;
671 case 1: /* cost: 3+4+3 = 10 */
672 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
673 RT_FALL_THRU();
674 case 2: /* cost: 4+3 = 7 */
675 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
676 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
677 break;
678 case 3: /* cost: 3+3 = 6 */
679 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
680 RT_FALL_THRU();
681 case 4: /* cost: 3 */
682 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
683 break;
684 case 5: /* cost: 3+4 = 7 */
685 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
686 RT_FALL_THRU();
687 case 6: /* cost: 4 */
688 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
689 break;
690 case 7: /* cost: 3 */
691 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
692 break;
693 }
694
695 /* Compare qwords: */
696 uint32_t const cQWords = cbLeft >> 3;
697 CHECK_OPCODES_ECX_IMM(cQWords); /* cost: 5 */
698
699 pbCodeBuf[off++] = X86_OP_PRF_REPZ; /* cost: 5 */
700 CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
701 cbLeft &= 7;
702
703 if (cbLeft & 4)
704 CHECK_OPCODES_CMPSX(0xa7, 4, 0); /* cost: 3 */
705 if (cbLeft & 2)
706 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP); /* cost: 4 */
707 if (cbLeft & 1)
708 CHECK_OPCODES_CMPSX(0xa6, 1, 0); /* cost: 3 */
709
710 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
711 iemNativeRegFreeTmp(pReNative, idxRegCx);
712 iemNativeRegFreeTmp(pReNative, idxRegSi);
713 iemNativeRegFreeTmp(pReNative, idxRegDi);
714 }
715
716#elif defined(RT_ARCH_ARM64)
717 /* We need pbInstrBuf in a register, whatever we do. */
718 uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off);
719 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
720
721 /* We also need at least one more register for holding bytes & words we
722 load via pbInstrBuf. */
723 uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off);
724
725 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
726
727 /* One byte compare can be done with the opcode byte as an immediate. We'll
728 do this to uint16_t align src1. */
729 bool fPendingJmp = RT_BOOL(offPage & 1);
730 if (fPendingJmp)
731 {
732 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
733 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/);
734 offPage += 1;
735 cbLeft -= 1;
736 }
737
738 if (cbLeft > 0)
739 {
740 /* We need a register for holding the opcode bytes we're comparing with,
741 as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */
742 uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off);
743
744 /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */
745 if ((offPage & 3) && cbLeft >= 2)
746 {
747 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2);
748 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
749 if (fPendingJmp)
750 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
751 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
752 else
753 {
754 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
755 fPendingJmp = true;
756 }
757 pbOpcodes += 2;
758 offPage += 2;
759 cbLeft -= 2;
760 }
761
762 /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */
763 if ((offPage & 7) && cbLeft >= 4)
764 {
765 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4);
766 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
767 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
768 if (fPendingJmp)
769 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
770 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
771 else
772 {
773 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
774 fPendingJmp = true;
775 }
776 pbOpcodes += 4;
777 offPage += 4;
778 cbLeft -= 4;
779 }
780
781 /*
782 * If we've got 16 bytes or more left, switch to memcmp-style.
783 */
784 if (cbLeft >= 16)
785 {
786 /* We need a pointer to the copy of the original opcode bytes. */
787 uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off);
788 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes);
789
790 /* If there are more than 32 bytes to compare we create a loop, for
791 which we'll need a loop register. */
792 if (cbLeft >= 64)
793 {
794 if (fPendingJmp)
795 {
796 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
797 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
798 fPendingJmp = false;
799 }
800
801 uint8_t const idxRegLoop = iemNativeRegAllocTmp(pReNative, &off);
802 uint16_t const cLoops = cbLeft / 32;
803 cbLeft = cbLeft % 32;
804 pbOpcodes += cLoops * 32;
805 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops);
806
807 if (offPage != 0) /** @todo optimize out this instruction. */
808 {
809 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage);
810 offPage = 0;
811 }
812
813 uint32_t const offLoopStart = off;
814 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0);
815 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0);
816 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
817
818 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1);
819 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1);
820 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
821 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
822
823 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2);
824 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2);
825 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
826 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
827
828 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3);
829 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3);
830 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
831 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
832
833 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
834 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
835
836 /* Advance and loop. */
837 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20);
838 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20);
839 pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/);
840 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off);
841
842 iemNativeRegFreeTmp(pReNative, idxRegLoop);
843 }
844
845 /* Deal with any remaining dwords (uint64_t). There can be up to
846 three if we looped and four if we didn't. */
847 uint32_t offSrc2 = 0;
848 while (cbLeft >= 8)
849 {
850 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val,
851 idxRegSrc1Ptr, offPage / 8);
852 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val,
853 idxRegSrc2Ptr, offSrc2 / 8);
854 if (fPendingJmp)
855 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
856 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
857 else
858 {
859 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
860 fPendingJmp = true;
861 }
862 pbOpcodes += 8;
863 offPage += 8;
864 offSrc2 += 8;
865 cbLeft -= 8;
866 }
867
868 iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr);
869 /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */
870 }
871 /*
872 * Otherwise, we compare with constants and merge with the general mop-up.
873 */
874 else
875 {
876 while (cbLeft >= 8)
877 {
878 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr,
879 offPage / 8);
880 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val,
881 RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4],
882 pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
883 if (fPendingJmp)
884 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
885 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/);
886 else
887 {
888 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/);
889 fPendingJmp = true;
890 }
891 pbOpcodes += 8;
892 offPage += 8;
893 cbLeft -= 8;
894 }
895 /* max cost thus far: 21 */
896 }
897
898 /* Deal with any remaining bytes (7 or less). */
899 Assert(cbLeft < 8);
900 if (cbLeft >= 4)
901 {
902 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr,
903 offPage / 4);
904 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
905 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
906 if (fPendingJmp)
907 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
908 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
909 else
910 {
911 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
912 fPendingJmp = true;
913 }
914 pbOpcodes += 4;
915 offPage += 4;
916 cbLeft -= 4;
917
918 }
919
920 if (cbLeft >= 2)
921 {
922 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr,
923 offPage / 2);
924 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
925 if (fPendingJmp)
926 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
927 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
928 else
929 {
930 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
931 fPendingJmp = true;
932 }
933 pbOpcodes += 2;
934 offPage += 2;
935 cbLeft -= 2;
936 }
937
938 if (cbLeft > 0)
939 {
940 Assert(cbLeft == 1);
941 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
942 if (fPendingJmp)
943 {
944 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]);
945 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
946 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
947 }
948 else
949 {
950 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/);
951 fPendingJmp = true;
952 }
953 pbOpcodes += 1;
954 offPage += 1;
955 cbLeft -= 1;
956 }
957
958 iemNativeRegFreeTmp(pReNative, idxRegSrc2Val);
959 }
960 Assert(cbLeft == 0);
961
962 /*
963 * Finally, the branch on difference.
964 */
965 if (fPendingJmp)
966 {
967 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
968 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
969 }
970 RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump, idxLabelObsoleteTb);
971
972 /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */
973 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
974 iemNativeRegFreeTmp(pReNative, idxRegSrc1Val);
975 iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr);
976
977#else
978# error "Port me"
979#endif
980 return off;
981}
982
983
984/** Duplicated in IEMAllThrdFuncsBltIn.cpp. */
985DECL_FORCE_INLINE(RTGCPHYS) iemTbGetRangePhysPageAddr(PCIEMTB pTb, uint8_t idxRange)
986{
987 Assert(idxRange < RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges)));
988 uint8_t const idxPage = pTb->aRanges[idxRange].idxPhysPage;
989 Assert(idxPage <= RT_ELEMENTS(pTb->aGCPhysPages));
990 if (idxPage == 0)
991 return pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
992 Assert(!(pTb->aGCPhysPages[idxPage - 1] & GUEST_PAGE_OFFSET_MASK));
993 return pTb->aGCPhysPages[idxPage - 1];
994}
995
996
997/**
998 * Macro that implements PC check after a conditional branch.
999 */
1000#define BODY_CHECK_PC_AFTER_BRANCH(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
1001 RT_NOREF(a_cbInstr); \
1002 off = iemNativeEmitBltInCheckPcAfterBranch(pReNative, off, a_pTb, a_idxRange, a_offRange)
1003
1004DECL_FORCE_INLINE(uint32_t)
1005iemNativeEmitBltInCheckPcAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb,
1006 uint8_t idxRange, uint16_t offRange)
1007{
1008#ifdef VBOX_STRICT
1009 off = iemNativeEmitMarker(pReNative, off, 0x80000004);
1010#endif
1011
1012 /*
1013 * The GCPhysRangePageWithOffset value in the threaded function is a fixed
1014 * constant for us here.
1015 *
1016 * We can pretend that iem.s.cbInstrBufTotal is X86_PAGE_SIZE here, because
1017 * it serves no purpose as a CS.LIM, if that's needed we've just performed
1018 * it, and as long as we don't implement code TLB reload code here there is
1019 * no point in checking that the TLB data we're using is still valid.
1020 *
1021 * What we to do is.
1022 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1023 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1024 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1025 * we're in the wrong spot and need to find a new TB.
1026 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1027 * GCPhysRangePageWithOffset constant mentioned above.
1028 *
1029 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1030 * in 64-bit code or flat 32-bit.
1031 */
1032
1033 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1034 the temp register, so we don't accidentally clobber something we'll be
1035 needing again immediately. This is why we get idxRegCsBase here. */
1036 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1037 kIemNativeGstRegUse_ReadOnly);
1038 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1039 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1040 kIemNativeGstRegUse_ReadOnly);
1041
1042 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1043
1044#ifdef VBOX_STRICT
1045 /* Do assertions before idxRegTmp contains anything. */
1046 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1047# ifdef RT_ARCH_AMD64
1048 {
1049 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1050 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1051 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1052 {
1053 /* cmp r/m64, imm8 */
1054 pbCodeBuf[off++] = X86_OP_REX_W;
1055 pbCodeBuf[off++] = 0x83;
1056 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1057 pbCodeBuf[off++] = 0;
1058 /* je rel8 */
1059 pbCodeBuf[off++] = 0x74;
1060 pbCodeBuf[off++] = 1;
1061 /* int3 */
1062 pbCodeBuf[off++] = 0xcc;
1063
1064 }
1065
1066 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1067 /* test r/m64, imm32 */
1068 pbCodeBuf[off++] = X86_OP_REX_W;
1069 pbCodeBuf[off++] = 0xf7;
1070 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1071 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1072 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1073 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1074 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1075 /* jz rel8 */
1076 pbCodeBuf[off++] = 0x74;
1077 pbCodeBuf[off++] = 1;
1078 /* int3 */
1079 pbCodeBuf[off++] = 0xcc;
1080 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1081 }
1082# else
1083
1084 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1085 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1086 {
1087 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1088# ifdef RT_ARCH_ARM64
1089 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1090 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1091 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2004);
1092 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1093# else
1094# error "Port me!"
1095# endif
1096 }
1097# endif
1098
1099#endif /* VBOX_STRICT */
1100
1101 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1102 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1103 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1104 {
1105#ifdef RT_ARCH_ARM64
1106 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1107 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1108 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1109#else
1110 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1111 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1112#endif
1113 }
1114 else
1115 {
1116#ifdef RT_ARCH_ARM64
1117 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1118 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1119 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1120 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1121#else
1122 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1123 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1124 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1125#endif
1126 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1127 }
1128 iemNativeRegFreeTmp(pReNative, idxRegPc);
1129
1130 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal. */
1131 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1132 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_CheckBranchMiss);
1133
1134 /* 4. Add iem.s.GCPhysInstrBuf and compare with GCPhysRangePageWithOffset. */
1135#ifdef RT_ARCH_AMD64
1136 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1137 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1138 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1139 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1140 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1141
1142#elif defined(RT_ARCH_ARM64)
1143 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1144
1145 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1146 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1147 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1148 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1149
1150# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1151 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1152 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1153 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1154# endif
1155 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1156#else
1157# error "Port me"
1158#endif
1159
1160 RTGCPHYS const GCPhysRangePageWithOffset = ( iemTbGetRangePhysPageAddr(pTb, idxRange)
1161 | pTb->aRanges[idxRange].offPhysPage)
1162 + offRange;
1163 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegTmp, GCPhysRangePageWithOffset,
1164 kIemNativeLabelType_CheckBranchMiss);
1165
1166 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1167 return off;
1168}
1169
1170
1171/**
1172 * Macro that implements TLB loading and updating pbInstrBuf updating for an
1173 * instruction crossing into a new page.
1174 *
1175 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1176 */
1177#define BODY_LOAD_TLB_FOR_NEW_PAGE(a_pTb, a_offInstr, a_idxRange, a_cbInstr) \
1178 RT_NOREF(a_cbInstr); \
1179 off = iemNativeEmitBltLoadTlbForNewPage(pReNative, off, pTb, a_idxRange, a_offInstr)
1180
1181DECL_FORCE_INLINE(uint32_t)
1182iemNativeEmitBltLoadTlbForNewPage(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint8_t offInstr)
1183{
1184#ifdef VBOX_STRICT
1185 off = iemNativeEmitMarker(pReNative, off, 0x80000005);
1186#endif
1187
1188 /*
1189 * Move/spill/flush stuff out of call-volatile registers.
1190 * This is the easy way out. We could contain this to the tlb-miss branch
1191 * by saving and restoring active stuff here.
1192 */
1193 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
1194 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
1195
1196 /*
1197 * Define labels and allocate the register for holding the GCPhys of the new page.
1198 */
1199 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1200 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1201 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1202 uint32_t const idxRegGCPhys = iemNativeRegAllocTmp(pReNative, &off);
1203
1204 /*
1205 * First we try to go via the TLB.
1206 */
1207 /** @todo */
1208
1209 /*
1210 * TLB miss: Call iemNativeHlpMemCodeNewPageTlbMissWithOff to do the work.
1211 */
1212 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1213
1214 /* IEMNATIVE_CALL_ARG1_GREG = offInstr */
1215 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offInstr);
1216
1217 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1218 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1219
1220 /* Done setting up parameters, make the call. */
1221 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMissWithOff);
1222
1223 /* Move the result to the right register. */
1224 if (idxRegGCPhys != IEMNATIVE_CALL_RET_GREG)
1225 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegGCPhys, IEMNATIVE_CALL_RET_GREG);
1226
1227 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1228
1229 /*
1230 * Now check the physical address of the page matches the expected one.
1231 */
1232 RTGCPHYS const GCPhysNewPage = iemTbGetRangePhysPageAddr(pTb, idxRange);
1233 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegGCPhys, GCPhysNewPage,
1234 kIemNativeLabelType_ObsoleteTb);
1235
1236 iemNativeRegFreeTmp(pReNative, idxRegGCPhys);
1237 return off;
1238}
1239
1240
1241/**
1242 * Macro that implements TLB loading and updating pbInstrBuf updating when
1243 * branching or when crossing a page on an instruction boundrary.
1244 *
1245 * This differs from BODY_LOAD_TLB_FOR_NEW_PAGE in that it will first check if
1246 * it is an inter-page branch and also check the page offset.
1247 *
1248 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1249 */
1250#define BODY_LOAD_TLB_AFTER_BRANCH(a_pTb, a_idxRange, a_cbInstr) \
1251 RT_NOREF(a_cbInstr); \
1252 off = iemNativeEmitBltLoadTlbAfterBranch(pReNative, off, pTb, a_idxRange)
1253
1254#if 0
1255do { \
1256 /* Is RIP within the current code page? */ \
1257 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu)); \
1258 uint64_t const uPc = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base; \
1259 uint64_t const off = uPc - pVCpu->iem.s.uInstrBufPc; \
1260 if (off < pVCpu->iem.s.cbInstrBufTotal) \
1261 { \
1262 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK)); \
1263 Assert(pVCpu->iem.s.pbInstrBuf); \
1264 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(a_pTb, a_idxRange) \
1265 | pTb->aRanges[(a_idxRange)].offPhysPage; \
1266 if (GCPhysRangePageWithOffset == pVCpu->iem.s.GCPhysInstrBuf + off) \
1267 { /* we're good */ } \
1268 else \
1269 { \
1270 Log7(("TB jmp miss: %p at %04x:%08RX64 LB %u; branching/1; GCPhysWithOffset=%RGp expected %RGp, pbInstrBuf=%p - #%u\n", \
1271 (a_pTb), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, (a_cbInstr), \
1272 pVCpu->iem.s.GCPhysInstrBuf + off, GCPhysRangePageWithOffset, pVCpu->iem.s.pbInstrBuf, __LINE__)); \
1273 RT_NOREF(a_cbInstr); \
1274 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses); \
1275 return VINF_IEM_REEXEC_BREAK; \
1276 } \
1277 } \
1278 else \
1279 { \
1280 /* Must translate new RIP. */ \
1281 pVCpu->iem.s.pbInstrBuf = NULL; \
1282 pVCpu->iem.s.offCurInstrStart = 0; \
1283 pVCpu->iem.s.offInstrNextByte = 0; \
1284 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL); \
1285 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) || !pVCpu->iem.s.pbInstrBuf); \
1286 \
1287 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(a_pTb, a_idxRange) \
1288 | pTb->aRanges[(a_idxRange)].offPhysPage; \
1289 uint64_t const offNew = uPc - pVCpu->iem.s.uInstrBufPc; \
1290 if ( GCPhysRangePageWithOffset == pVCpu->iem.s.GCPhysInstrBuf + offNew \
1291 && pVCpu->iem.s.pbInstrBuf) \
1292 { /* likely */ } \
1293 else \
1294 { \
1295 Log7(("TB jmp miss: %p at %04x:%08RX64 LB %u; branching/2; GCPhysWithOffset=%RGp expected %RGp, pbInstrBuf=%p - #%u\n", \
1296 (a_pTb), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, (a_cbInstr), \
1297 pVCpu->iem.s.GCPhysInstrBuf + offNew, GCPhysRangePageWithOffset, pVCpu->iem.s.pbInstrBuf, __LINE__)); \
1298 RT_NOREF(a_cbInstr); \
1299 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses); \
1300 return VINF_IEM_REEXEC_BREAK; \
1301 } \
1302 } \
1303 } while(0)
1304#endif
1305
1306DECL_FORCE_INLINE(uint32_t)
1307iemNativeEmitBltLoadTlbAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange)
1308{
1309#ifdef VBOX_STRICT
1310 off = iemNativeEmitMarker(pReNative, off, 0x80000006);
1311#endif
1312
1313 /*
1314 * Define labels and allocate the register for holding the GCPhys of the new page.
1315 */
1316 uint32_t const idxLabelCheckBranchMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckBranchMiss);
1317 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1318 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1319 //
1320
1321 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(pTb, idxRange)
1322 | pTb->aRanges[idxRange].offPhysPage;
1323
1324 /*
1325 *
1326 * First check if RIP is within the current code.
1327 *
1328 * This is very similar to iemNativeEmitBltInCheckPcAfterBranch, the only
1329 * difference is what we do when stuff doesn't match up.
1330 *
1331 * What we to do is.
1332 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1333 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1334 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1335 * we need to retranslate RIP via the TLB.
1336 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1337 * GCPhysRangePageWithOffset constant mentioned above.
1338 *
1339 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1340 * in 64-bit code or flat 32-bit.
1341 *
1342 */
1343
1344 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1345 the temp register, so we don't accidentally clobber something we'll be
1346 needing again immediately. This is why we get idxRegCsBase here. */
1347 /** @todo save+restore active registers and guest shadows in tlb-miss! */
1348 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
1349 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1350 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1351 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1352 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1353 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1354
1355 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off); /* volatile reg is okay for these two */
1356 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1357
1358#ifdef VBOX_STRICT
1359 /* Do assertions before idxRegTmp contains anything. */
1360 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1361# ifdef RT_ARCH_AMD64
1362 {
1363 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1364 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1365 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1366 {
1367 /* cmp r/m64, imm8 */
1368 pbCodeBuf[off++] = X86_OP_REX_W;
1369 pbCodeBuf[off++] = 0x83;
1370 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1371 pbCodeBuf[off++] = 0;
1372 /* je rel8 */
1373 pbCodeBuf[off++] = 0x74;
1374 pbCodeBuf[off++] = 1;
1375 /* int3 */
1376 pbCodeBuf[off++] = 0xcc;
1377
1378 }
1379
1380 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1381 /* test r/m64, imm32 */
1382 pbCodeBuf[off++] = X86_OP_REX_W;
1383 pbCodeBuf[off++] = 0xf7;
1384 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1385 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1386 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1387 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1388 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1389 /* jz rel8 */
1390 pbCodeBuf[off++] = 0x74;
1391 pbCodeBuf[off++] = 1;
1392 /* int3 */
1393 pbCodeBuf[off++] = 0xcc;
1394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1395 }
1396# else
1397
1398 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1399 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1400 {
1401 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1402# ifdef RT_ARCH_ARM64
1403 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1404 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1405 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2006);
1406 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1407# else
1408# error "Port me!"
1409# endif
1410 }
1411# endif
1412
1413#endif /* VBOX_STRICT */
1414
1415 /* Because we're lazy, we'll jump back here to recalc 'off' and share the
1416 GCPhysRangePageWithOffset check. This is a little risky, so we use the
1417 2nd register to check if we've looped more than once already.*/
1418 off = iemNativeEmitGprZero(pReNative, off, idxRegTmp2);
1419
1420 uint32_t const offLabelRedoChecks = off;
1421
1422 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1423 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1424 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1425 {
1426#ifdef RT_ARCH_ARM64
1427 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1428 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1429 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1430#else
1431 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1432 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1433#endif
1434 }
1435 else
1436 {
1437#ifdef RT_ARCH_ARM64
1438 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1439 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1440 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1441 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1442#else
1443 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1444 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1445 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1446#endif
1447 }
1448
1449 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal.
1450 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll jump to the TLB loading if this fails. */
1451 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1452 uint32_t const offFixedJumpToTlbLoad = off;
1453 off = iemNativeEmitJaToFixed(pReNative, off, off /* (ASSUME ja rel8 suffices) */);
1454
1455 /* 4a. Add iem.s.GCPhysInstrBuf to off ... */
1456#ifdef RT_ARCH_AMD64
1457 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1458 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1459 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1460 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1461 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1462
1463#elif defined(RT_ARCH_ARM64)
1464
1465 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1466 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1467 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1468 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1469
1470# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1471 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1472 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1473 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1474# endif
1475#else
1476# error "Port me"
1477#endif
1478
1479 /* 4b. ... and compare with GCPhysRangePageWithOffset.
1480
1481 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll have to be more
1482 careful and avoid implicit temporary register usage here.
1483
1484 Unlike the threaded version of this code, we do not obsolete TBs here to
1485 reduce the code size and because indirect calls may legally end at the
1486 same offset in two different pages depending on the program state. */
1487 /** @todo synch the threaded BODY_LOAD_TLB_AFTER_BRANCH version with this. */
1488 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegTmp2, GCPhysRangePageWithOffset);
1489 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegTmp, idxRegTmp2);
1490 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelCheckBranchMiss);
1491 uint32_t const offFixedJumpToEnd = off;
1492 off = iemNativeEmitJmpToFixed(pReNative, off, off + 512 /* force rel32 */);
1493
1494 /*
1495 * First we try to go via the TLB.
1496 */
1497 iemNativeFixupFixedJump(pReNative, offFixedJumpToTlbLoad, off);
1498
1499 /* Check that we haven't been here before. */
1500 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxRegTmp2, false /*f64Bit*/, idxLabelCheckBranchMiss);
1501
1502 /*
1503 * TLB miss: Call iemNativeHlpMemCodeNewPageTlbMiss to do the work.
1504 */
1505 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1506
1507 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1508 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1509
1510 /* Done setting up parameters, make the call. */
1511 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMiss);
1512
1513 /* Jmp back to the start and redo the checks. */
1514 off = iemNativeEmitLoadGpr8Imm(pReNative, off, idxRegTmp2, 1); /* indicate that we've looped once already */
1515 off = iemNativeEmitJmpToFixed(pReNative, off, offLabelRedoChecks);
1516
1517 /* The end. */
1518 iemNativeFixupFixedJump(pReNative, offFixedJumpToEnd, off);
1519
1520 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1521 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1522 iemNativeRegFreeTmp(pReNative, idxRegPc);
1523 if (idxRegCsBase != UINT8_MAX)
1524 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1525 return off;
1526}
1527
1528
1529#ifdef BODY_CHECK_CS_LIM
1530/**
1531 * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
1532 * raising a \#GP(0) if this isn't the case.
1533 */
1534IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
1535{
1536 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1537 BODY_SET_CUR_INSTR();
1538 BODY_FLUSH_PENDING_WRITES();
1539 BODY_CHECK_CS_LIM(cbInstr);
1540 return off;
1541}
1542#endif
1543
1544
1545#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
1546/**
1547 * Built-in function for re-checking opcodes and CS.LIM after an instruction
1548 * that may have modified them.
1549 */
1550IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
1551{
1552 PCIEMTB const pTb = pReNative->pTbOrg;
1553 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1554 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1555 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1556 BODY_SET_CUR_INSTR();
1557 BODY_FLUSH_PENDING_WRITES();
1558 BODY_CHECK_CS_LIM(cbInstr);
1559 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1560 return off;
1561}
1562#endif
1563
1564
1565#if defined(BODY_CHECK_OPCODES)
1566/**
1567 * Built-in function for re-checking opcodes after an instruction that may have
1568 * modified them.
1569 */
1570IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
1571{
1572 PCIEMTB const pTb = pReNative->pTbOrg;
1573 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1574 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1575 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1576 BODY_SET_CUR_INSTR();
1577 BODY_FLUSH_PENDING_WRITES();
1578 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1579 return off;
1580}
1581#endif
1582
1583
1584#if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1585/**
1586 * Built-in function for re-checking opcodes and considering the need for CS.LIM
1587 * checking after an instruction that may have modified them.
1588 */
1589IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
1590{
1591 PCIEMTB const pTb = pReNative->pTbOrg;
1592 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1593 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1594 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1595 BODY_SET_CUR_INSTR();
1596 BODY_FLUSH_PENDING_WRITES();
1597 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1598 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1599 return off;
1600}
1601#endif
1602
1603
1604/*
1605 * Post-branching checkers.
1606 */
1607
1608#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1609/**
1610 * Built-in function for checking CS.LIM, checking the PC and checking opcodes
1611 * after conditional branching within the same page.
1612 *
1613 * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
1614 */
1615IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
1616{
1617 PCIEMTB const pTb = pReNative->pTbOrg;
1618 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1619 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1620 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1621 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1622 BODY_SET_CUR_INSTR();
1623 BODY_FLUSH_PENDING_WRITES();
1624 BODY_CHECK_CS_LIM(cbInstr);
1625 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1626 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1627 //LogFunc(("okay\n"));
1628 return off;
1629}
1630#endif
1631
1632
1633#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
1634/**
1635 * Built-in function for checking the PC and checking opcodes after conditional
1636 * branching within the same page.
1637 *
1638 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1639 */
1640IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
1641{
1642 PCIEMTB const pTb = pReNative->pTbOrg;
1643 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1644 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1645 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1646 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1647 BODY_SET_CUR_INSTR();
1648 BODY_FLUSH_PENDING_WRITES();
1649 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1650 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1651 //LogFunc(("okay\n"));
1652 return off;
1653}
1654#endif
1655
1656
1657#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1658/**
1659 * Built-in function for checking the PC and checking opcodes and considering
1660 * the need for CS.LIM checking after conditional branching within the same
1661 * page.
1662 *
1663 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1664 */
1665IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
1666{
1667 PCIEMTB const pTb = pReNative->pTbOrg;
1668 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1669 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1670 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1671 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1672 BODY_SET_CUR_INSTR();
1673 BODY_FLUSH_PENDING_WRITES();
1674 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1675 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1676 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1677 //LogFunc(("okay\n"));
1678 return off;
1679}
1680#endif
1681
1682
1683#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1684/**
1685 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1686 * transitioning to a different code page.
1687 *
1688 * The code page transition can either be natural over onto the next page (with
1689 * the instruction starting at page offset zero) or by means of branching.
1690 *
1691 * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1692 */
1693IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
1694{
1695 PCIEMTB const pTb = pReNative->pTbOrg;
1696 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1697 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1698 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1699 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1700 BODY_SET_CUR_INSTR();
1701 BODY_FLUSH_PENDING_WRITES();
1702 BODY_CHECK_CS_LIM(cbInstr);
1703 Assert(offRange == 0);
1704 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1705 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1706 //LogFunc(("okay\n"));
1707 return off;
1708}
1709#endif
1710
1711
1712#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
1713/**
1714 * Built-in function for loading TLB and checking opcodes when transitioning to
1715 * a different code page.
1716 *
1717 * The code page transition can either be natural over onto the next page (with
1718 * the instruction starting at page offset zero) or by means of branching.
1719 *
1720 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1721 */
1722IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
1723{
1724 PCIEMTB const pTb = pReNative->pTbOrg;
1725 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1726 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1727 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1728 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1729 BODY_SET_CUR_INSTR();
1730 BODY_FLUSH_PENDING_WRITES();
1731 Assert(offRange == 0);
1732 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1733 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1734 //LogFunc(("okay\n"));
1735 return off;
1736}
1737#endif
1738
1739
1740#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1741/**
1742 * Built-in function for loading TLB and checking opcodes and considering the
1743 * need for CS.LIM checking when transitioning to a different code page.
1744 *
1745 * The code page transition can either be natural over onto the next page (with
1746 * the instruction starting at page offset zero) or by means of branching.
1747 *
1748 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1749 */
1750IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
1751{
1752 PCIEMTB const pTb = pReNative->pTbOrg;
1753 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1754 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1755 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1756 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1757 BODY_SET_CUR_INSTR();
1758 BODY_FLUSH_PENDING_WRITES();
1759 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1760 Assert(offRange == 0);
1761 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1762 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1763 //LogFunc(("okay\n"));
1764 return off;
1765}
1766#endif
1767
1768
1769
1770/*
1771 * Natural page crossing checkers.
1772 */
1773
1774#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1775/**
1776 * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
1777 * both pages when transitioning to a different code page.
1778 *
1779 * This is used when the previous instruction requires revalidation of opcodes
1780 * bytes and the current instruction stries a page boundrary with opcode bytes
1781 * in both the old and new page.
1782 *
1783 * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1784 */
1785IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
1786{
1787 PCIEMTB const pTb = pReNative->pTbOrg;
1788 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1789 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1790 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1791 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1792 uint32_t const idxRange2 = idxRange1 + 1;
1793 BODY_SET_CUR_INSTR();
1794 BODY_FLUSH_PENDING_WRITES();
1795 BODY_CHECK_CS_LIM(cbInstr);
1796 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1797 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1798 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1799 return off;
1800}
1801#endif
1802
1803
1804#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1805/**
1806 * Built-in function for loading TLB and checking opcodes on both pages when
1807 * transitioning to a different code page.
1808 *
1809 * This is used when the previous instruction requires revalidation of opcodes
1810 * bytes and the current instruction stries a page boundrary with opcode bytes
1811 * in both the old and new page.
1812 *
1813 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1814 */
1815IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
1816{
1817 PCIEMTB const pTb = pReNative->pTbOrg;
1818 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1819 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1820 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1821 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1822 uint32_t const idxRange2 = idxRange1 + 1;
1823 BODY_SET_CUR_INSTR();
1824 BODY_FLUSH_PENDING_WRITES();
1825 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1826 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1827 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1828 return off;
1829}
1830#endif
1831
1832
1833#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1834/**
1835 * Built-in function for loading TLB and checking opcodes on both pages and
1836 * considering the need for CS.LIM checking when transitioning to a different
1837 * code page.
1838 *
1839 * This is used when the previous instruction requires revalidation of opcodes
1840 * bytes and the current instruction stries a page boundrary with opcode bytes
1841 * in both the old and new page.
1842 *
1843 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1844 */
1845IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
1846{
1847 PCIEMTB const pTb = pReNative->pTbOrg;
1848 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1849 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1850 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1851 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1852 uint32_t const idxRange2 = idxRange1 + 1;
1853 BODY_SET_CUR_INSTR();
1854 BODY_FLUSH_PENDING_WRITES();
1855 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1856 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1857 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1858 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1859 return off;
1860}
1861#endif
1862
1863
1864#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1865/**
1866 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1867 * advancing naturally to a different code page.
1868 *
1869 * Only opcodes on the new page is checked.
1870 *
1871 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1872 */
1873IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
1874{
1875 PCIEMTB const pTb = pReNative->pTbOrg;
1876 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1877 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1878 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1879 //uint32_t const offRange1 = (uint32_t)uParam2;
1880 uint32_t const idxRange2 = idxRange1 + 1;
1881 BODY_SET_CUR_INSTR();
1882 BODY_FLUSH_PENDING_WRITES();
1883 BODY_CHECK_CS_LIM(cbInstr);
1884 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1885 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1886 return off;
1887}
1888#endif
1889
1890
1891#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1892/**
1893 * Built-in function for loading TLB and checking opcodes when advancing
1894 * naturally to a different code page.
1895 *
1896 * Only opcodes on the new page is checked.
1897 *
1898 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1899 */
1900IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
1901{
1902 PCIEMTB const pTb = pReNative->pTbOrg;
1903 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1904 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1905 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1906 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1907 uint32_t const idxRange2 = idxRange1 + 1;
1908 BODY_SET_CUR_INSTR();
1909 BODY_FLUSH_PENDING_WRITES();
1910 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1911 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1912 return off;
1913}
1914#endif
1915
1916
1917#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1918/**
1919 * Built-in function for loading TLB and checking opcodes and considering the
1920 * need for CS.LIM checking when advancing naturally to a different code page.
1921 *
1922 * Only opcodes on the new page is checked.
1923 *
1924 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1925 */
1926IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
1927{
1928 PCIEMTB const pTb = pReNative->pTbOrg;
1929 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1930 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1931 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1932 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1933 uint32_t const idxRange2 = idxRange1 + 1;
1934 BODY_SET_CUR_INSTR();
1935 BODY_FLUSH_PENDING_WRITES();
1936 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1937 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1938 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1939 return off;
1940}
1941#endif
1942
1943
1944#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1945/**
1946 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1947 * advancing naturally to a different code page with first instr at byte 0.
1948 *
1949 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1950 */
1951IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
1952{
1953 PCIEMTB const pTb = pReNative->pTbOrg;
1954 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1955 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1956 BODY_SET_CUR_INSTR();
1957 BODY_FLUSH_PENDING_WRITES();
1958 BODY_CHECK_CS_LIM(cbInstr);
1959 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1960 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1961 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1962 return off;
1963}
1964#endif
1965
1966
1967#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1968/**
1969 * Built-in function for loading TLB and checking opcodes when advancing
1970 * naturally to a different code page with first instr at byte 0.
1971 *
1972 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1973 */
1974IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
1975{
1976 PCIEMTB const pTb = pReNative->pTbOrg;
1977 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1978 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1979 BODY_SET_CUR_INSTR();
1980 BODY_FLUSH_PENDING_WRITES();
1981 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1982 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1983 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1984 return off;
1985}
1986#endif
1987
1988
1989#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1990/**
1991 * Built-in function for loading TLB and checking opcodes and considering the
1992 * need for CS.LIM checking when advancing naturally to a different code page
1993 * with first instr at byte 0.
1994 *
1995 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1996 */
1997IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
1998{
1999 PCIEMTB const pTb = pReNative->pTbOrg;
2000 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
2001 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
2002 BODY_SET_CUR_INSTR();
2003 BODY_FLUSH_PENDING_WRITES();
2004 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
2005 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
2006 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
2007 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
2008 return off;
2009}
2010#endif
2011
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette