VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompBltIn.cpp@ 102663

Last change on this file since 102663 was 102663, checked in by vboxsync, 16 months ago

VMM/IEM: Working on BODY_CHECK_PC_AFTER_BRANCH and sideeffects of it. Fixed bug in 8-bit register stores (AMD64). Fixed bug in iemNativeEmitBltInCheckOpcodes (AMD64). Added a way to inject state logging between each instruction, currently only really implemented for AMD64. Relaxed the heave flushing code, no need to set the buffer pointer to NULL. Started looking at avoiding code TLB flushing when allocating memory to replace zero pages. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 67.8 KB
Line 
1/* $Id: IEMAllN8veRecompBltIn.cpp 102663 2023-12-21 01:55:07Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, Emitters for Built-In Threaded Functions.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#include <VBox/vmm/iem.h>
37#include <VBox/vmm/cpum.h>
38#include <VBox/vmm/dbgf.h>
39#include "IEMInternal.h"
40#include <VBox/vmm/vmcc.h>
41#include <VBox/log.h>
42#include <VBox/err.h>
43#include <VBox/param.h>
44#include <iprt/assert.h>
45#include <iprt/string.h>
46#if defined(RT_ARCH_AMD64)
47# include <iprt/x86.h>
48#elif defined(RT_ARCH_ARM64)
49# include <iprt/armv8.h>
50#endif
51
52
53#include "IEMInline.h"
54#include "IEMThreadedFunctions.h"
55#include "IEMN8veRecompiler.h"
56#include "IEMN8veRecompilerEmit.h"
57
58
59
60/*********************************************************************************************************************************
61* TB Helper Functions *
62*********************************************************************************************************************************/
63#ifdef RT_ARCH_AMD64
64DECLASM(void) iemNativeHlpAsmSafeWrapLogCpuState(void);
65#endif
66
67
68
69/*********************************************************************************************************************************
70* Builtin functions *
71*********************************************************************************************************************************/
72
73/**
74 * Built-in function that does nothing.
75 *
76 * Whether this is called or not can be controlled by the entry in the
77 * IEMThreadedGenerator.katBltIns table. This can be useful to determine
78 * whether why behaviour changes when enabling the LogCpuState builtins. I.e.
79 * whether it's the reduced call count in the TBs or the threaded calls flushing
80 * register state.
81 */
82IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_Nop)
83{
84 RT_NOREF(pReNative, pCallEntry);
85 return off;
86}
87
88
89/**
90 * Emits for for LogCpuState.
91 *
92 * This shouldn't have any relevant impact on the recompiler state.
93 */
94IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_LogCpuState)
95{
96#ifdef RT_ARCH_AMD64
97 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
98 /* push rax */
99 pbCodeBuf[off++] = 0x50 + X86_GREG_xAX;
100 /* push imm32 */
101 pbCodeBuf[off++] = 0x68;
102 pbCodeBuf[off++] = RT_BYTE1(pCallEntry->auParams[0]);
103 pbCodeBuf[off++] = RT_BYTE2(pCallEntry->auParams[0]);
104 pbCodeBuf[off++] = RT_BYTE3(pCallEntry->auParams[0]);
105 pbCodeBuf[off++] = RT_BYTE4(pCallEntry->auParams[0]);
106 /* mov rax, iemNativeHlpAsmSafeWrapLogCpuState */
107 pbCodeBuf[off++] = X86_OP_REX_W;
108 pbCodeBuf[off++] = 0xb8 + X86_GREG_xAX;
109 *(uint64_t *)&pbCodeBuf[off] = (uintptr_t)iemNativeHlpAsmSafeWrapLogCpuState;
110 off += sizeof(uint64_t);
111 /* call rax */
112 pbCodeBuf[off++] = 0xff;
113 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
114 /* pop rax */
115 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
116 /* pop rax */
117 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
119
120#else
121 /** @todo Implement this */
122 AssertFailed();
123 RT_NOREF(pReNative, pCallEntry);
124#endif
125 return off;
126}
127
128
129/**
130 * Built-in function that calls a C-implemention function taking zero arguments.
131 */
132IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
133{
134 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
135 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
136 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
137 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
138}
139
140
141/**
142 * Built-in function that checks for pending interrupts that can be delivered or
143 * forced action flags.
144 *
145 * This triggers after the completion of an instruction, so EIP is already at
146 * the next instruction. If an IRQ or important FF is pending, this will return
147 * a non-zero status that stops TB execution.
148 */
149IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
150{
151 RT_NOREF(pCallEntry);
152
153 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
154 and I'm too lazy to create a 'Fixed' version of that one. */
155 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
156 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
157
158 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
159
160 /* Again, we need to load the extended EFLAGS before we actually need them
161 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
162 loaded them inside the check, as the shadow state would not be correct
163 when the code branches before the load. Ditto PC. */
164 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
165 kIemNativeGstRegUse_ReadOnly);
166
167 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
168
169 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
170
171 /*
172 * Start by checking the local forced actions of the EMT we're on for IRQs
173 * and other FFs that needs servicing.
174 */
175 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
176 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
177 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
178 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
179 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
180 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
181 | VMCPU_FF_TLB_FLUSH
182 | VMCPU_FF_UNHALT ),
183 true /*fSetFlags*/);
184 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
185 uint32_t const offFixupJumpToVmCheck1 = off;
186 off = iemNativeEmitJzToFixed(pReNative, off, 0);
187
188 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
189 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
190 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
191 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
192 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
193 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
194
195 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
196 suppressed by the CPU or not. */
197 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
198 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
199 idxLabelReturnBreak);
200
201 /* We've got shadow flags set, so we must check that the PC they are valid
202 for matches our current PC value. */
203 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
204 * a register. */
205 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
206 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
207
208 /*
209 * Now check the force flags of the VM.
210 */
211 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
212 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
213 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
214 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
215 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
216 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
217
218 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
219
220 /*
221 * We're good, no IRQs or FFs pending.
222 */
223 iemNativeRegFreeTmp(pReNative, idxTmpReg);
224 iemNativeRegFreeTmp(pReNative, idxEflReg);
225 iemNativeRegFreeTmp(pReNative, idxPcReg);
226
227 return off;
228}
229
230
231/**
232 * Built-in function checks if IEMCPU::fExec has the expected value.
233 */
234IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
235{
236 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
237 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
238
239 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
240 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
241 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
242 kIemNativeLabelType_ReturnBreak);
243 iemNativeRegFreeTmp(pReNative, idxTmpReg);
244 return off;
245}
246
247
248/**
249 * Sets idxTbCurInstr in preparation of raising an exception.
250 */
251/** @todo Optimize this, so we don't set the same value more than once. Just
252 * needs some tracking. */
253#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
254# define BODY_SET_CUR_INSTR() \
255 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
256#else
257# define BODY_SET_CUR_INSTR() ((void)0)
258#endif
259
260
261/**
262 * Macro that emits the 16/32-bit CS.LIM check.
263 */
264#define BODY_CHECK_CS_LIM(a_cbInstr) \
265 off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
266
267DECL_FORCE_INLINE(uint32_t)
268iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
269{
270 Assert(cbInstr > 0);
271 Assert(cbInstr < 16);
272#ifdef VBOX_STRICT
273 off = iemNativeEmitMarker(pReNative, off, 0x80000001);
274#endif
275
276 /*
277 * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
278 * a temporary register for calculating the last address of the instruction.
279 *
280 * The calculation and comparisons are 32-bit. We ASSUME that the incoming
281 * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
282 * that last updated EIP here checked it already, and that we're therefore
283 * safe in the 32-bit wrap-around scenario to only check that the last byte
284 * is within CS.LIM. In the case of instruction-by-instruction advancing
285 * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
286 * must be using 4KB granularity and the previous instruction was fine.
287 */
288 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
289 kIemNativeGstRegUse_ReadOnly);
290 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
291 kIemNativeGstRegUse_ReadOnly);
292#ifdef RT_ARCH_AMD64
293 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
294#elif defined(RT_ARCH_ARM64)
295 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
296#else
297# error "Port me"
298#endif
299
300 if (cbInstr != 1)
301 {
302 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
303
304 /*
305 * 1. idxRegTmp = idxRegPc + cbInstr;
306 * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
307 */
308#ifdef RT_ARCH_AMD64
309 /* 1. lea tmp32, [Pc + cbInstr - 1] */
310 if (idxRegTmp >= 8 || idxRegPc >= 8)
311 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
312 pbCodeBuf[off++] = 0x8d;
313 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
314 if ((idxRegPc & 7) == X86_GREG_xSP)
315 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
316 pbCodeBuf[off++] = cbInstr - 1;
317
318 /* 2. cmp tmp32(r), CsLim(r/m). */
319 if (idxRegTmp >= 8 || idxRegCsLim >= 8)
320 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
321 pbCodeBuf[off++] = 0x3b;
322 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
323
324#elif defined(RT_ARCH_ARM64)
325 /* 1. add tmp32, Pc, #cbInstr-1 */
326 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
327 /* 2. cmp tmp32, CsLim */
328 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
329 false /*f64Bit*/, true /*fSetFlags*/);
330
331#endif
332 iemNativeRegFreeTmp(pReNative, idxRegTmp);
333 }
334 else
335 {
336 /*
337 * Here we can skip step 1 and compare PC and CS.LIM directly.
338 */
339#ifdef RT_ARCH_AMD64
340 /* 2. cmp eip(r), CsLim(r/m). */
341 if (idxRegPc >= 8 || idxRegCsLim >= 8)
342 pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
343 pbCodeBuf[off++] = 0x3b;
344 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
345
346#elif defined(RT_ARCH_ARM64)
347 /* 2. cmp Pc, CsLim */
348 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
349 false /*f64Bit*/, true /*fSetFlags*/);
350
351#endif
352 }
353
354 /* 3. Jump if greater. */
355 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
356
357 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
358 iemNativeRegFreeTmp(pReNative, idxRegPc);
359 return off;
360}
361
362
363/**
364 * Macro that considers whether we need CS.LIM checking after a branch or
365 * crossing over to a new page.
366 */
367#define BODY_CONSIDER_CS_LIM_CHECKING(a_pTb, a_cbInstr) \
368 RT_NOREF(a_cbInstr); \
369 off = iemNativeEmitBltInConsiderLimChecking(pReNative, off)
370
371DECL_FORCE_INLINE(uint32_t)
372iemNativeEmitBltInConsiderLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off)
373{
374#ifdef VBOX_STRICT
375 off = iemNativeEmitMarker(pReNative, off, 0x80000002);
376#endif
377
378 /*
379 * This check must match the ones in the iem in iemGetTbFlagsForCurrentPc
380 * exactly:
381 *
382 * int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
383 * if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
384 * return fRet;
385 * return fRet | IEMTB_F_CS_LIM_CHECKS;
386 *
387 *
388 * We need EIP, CS.LIM and CS.BASE here.
389 */
390
391 /* Calculate the offFromLim first: */
392 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
393 kIemNativeGstRegUse_ReadOnly);
394 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
395 kIemNativeGstRegUse_ReadOnly);
396 uint8_t const idxRegLeft = iemNativeRegAllocTmp(pReNative, &off);
397
398#ifdef RT_ARCH_ARM64
399 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
400 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegLeft, idxRegCsLim, idxRegPc);
401 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
402#else
403 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegLeft, idxRegCsLim);
404 off = iemNativeEmitSubTwoGprs(pReNative, off, idxRegLeft, idxRegPc);
405#endif
406
407 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
408 iemNativeRegFreeTmp(pReNative, idxRegPc);
409
410 /* Calculate the threshold level (right side). */
411 uint8_t const idxRegCsBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
412 kIemNativeGstRegUse_ReadOnly);
413 uint8_t const idxRegRight = iemNativeRegAllocTmp(pReNative, &off);
414
415#ifdef RT_ARCH_ARM64
416 pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
417 Assert(Armv8A64ConvertImmRImmS2Mask32(11, 0) == GUEST_PAGE_OFFSET_MASK);
418 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegRight, idxRegCsBase, 11, 0, false /*f64Bit*/);
419 pu32CodeBuf[off++] = Armv8A64MkInstrNeg(idxRegRight);
420 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
421 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
422 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
423
424#else
425 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegRight, GUEST_PAGE_OFFSET_MASK);
426 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegRight, idxRegCsBase);
427 off = iemNativeEmitNegGpr(pReNative, off, idxRegRight);
428 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRight, X86_PAGE_SIZE + 16);
429#endif
430
431 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
432
433 /* Compare the two and jump out if we're too close to the limit. */
434 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegLeft, idxRegRight);
435 off = iemNativeEmitJlToNewLabel(pReNative, off, kIemNativeLabelType_NeedCsLimChecking);
436
437 iemNativeRegFreeTmp(pReNative, idxRegRight);
438 iemNativeRegFreeTmp(pReNative, idxRegLeft);
439 return off;
440}
441
442
443
444/**
445 * Macro that implements opcode (re-)checking.
446 */
447#define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
448 RT_NOREF(a_cbInstr); \
449 off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
450
451DECL_FORCE_INLINE(uint32_t)
452iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
453{
454 Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
455 Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
456#ifdef VBOX_STRICT
457 off = iemNativeEmitMarker(pReNative, off, 0x80000003);
458#endif
459
460 uint32_t const idxLabelObsoleteTb = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ObsoleteTb);
461
462 /*
463 * Where to start and how much to compare.
464 *
465 * Looking at the ranges produced when r160746 was running a DOS VM with TB
466 * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
467 * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
468 *
469 * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
470 * 12, 10, 11, 6, 13, 15 and 16. Max 0x359 bytes. Same revision as above.
471 */
472 uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange;
473 uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange;
474 Assert(cbLeft > 0);
475 uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes];
476 uint32_t offConsolidatedJump = UINT32_MAX;
477
478#ifdef RT_ARCH_AMD64
479 /* AMD64/x86 offers a bunch of options. Smaller stuff will can be
480 completely inlined, for larger we use REPE CMPS. */
481# define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */ do { \
482 pbCodeBuf[off++] = a_bOpcode; \
483 Assert(offPage < 127); \
484 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
485 pbCodeBuf[off++] = RT_BYTE1(offPage); \
486 } while (0)
487
488# define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
489 if (offConsolidatedJump != UINT32_MAX) \
490 { \
491 int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
492 Assert(offDisp >= -128); \
493 pbCodeBuf[off++] = 0x75; /* jnz near */ \
494 pbCodeBuf[off++] = (uint8_t)offDisp; \
495 } \
496 else \
497 { \
498 pbCodeBuf[off++] = 0x74; /* jz near +5 */ \
499 pbCodeBuf[off++] = 0x05 /*+ 1*/; \
500 offConsolidatedJump = off; \
501 /*pbCodeBuf[off++] = 0xcc; */ \
502 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */ \
503 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_Rel32, -4); \
504 pbCodeBuf[off++] = 0x00; \
505 pbCodeBuf[off++] = 0x00; \
506 pbCodeBuf[off++] = 0x00; \
507 pbCodeBuf[off++] = 0x00; \
508 } \
509 } while (0)
510
511# define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
512 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
513 pbCodeBuf[off++] = *pbOpcodes++; \
514 pbCodeBuf[off++] = *pbOpcodes++; \
515 pbCodeBuf[off++] = *pbOpcodes++; \
516 pbCodeBuf[off++] = *pbOpcodes++; \
517 cbLeft -= 4; \
518 offPage += 4; \
519 CHECK_OPCODES_CMP_JMP(); \
520 } while (0)
521
522# define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
523 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
524 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
525 pbCodeBuf[off++] = *pbOpcodes++; \
526 pbCodeBuf[off++] = *pbOpcodes++; \
527 cbLeft -= 2; \
528 offPage += 2; \
529 CHECK_OPCODES_CMP_JMP(); \
530 } while (0)
531
532# define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
533 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
534 pbCodeBuf[off++] = *pbOpcodes++; \
535 cbLeft -= 1; \
536 offPage += 1; \
537 CHECK_OPCODES_CMP_JMP(); \
538 } while (0)
539
540# define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
541 if (a_bPrefix) \
542 pbCodeBuf[off++] = (a_bPrefix); \
543 pbCodeBuf[off++] = (a_bOpcode); \
544 CHECK_OPCODES_CMP_JMP(); \
545 cbLeft -= (a_cbToSubtract); \
546 } while (0)
547
548# define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
549 pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
550 pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
551 pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
552 pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
553 pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
554 } while (0)
555
556 if (cbLeft <= 24)
557 {
558 uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
559 ( RT_BIT_32(X86_GREG_xAX)
560 | RT_BIT_32(X86_GREG_xCX)
561 | RT_BIT_32(X86_GREG_xDX)
562 | RT_BIT_32(X86_GREG_xBX)
563 | RT_BIT_32(X86_GREG_xSI)
564 | RT_BIT_32(X86_GREG_xDI))
565 & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
566 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
567 if (offPage >= 128 - cbLeft)
568 {
569 off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
570 offPage &= 3;
571 }
572
573 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 14 + 54 + 8 + 6 /* = 87 */);
574
575 if (cbLeft > 8)
576 switch (offPage & 3)
577 {
578 case 0:
579 break;
580 case 1: /* cost: 6 + 8 = 14 */
581 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
582 RT_FALL_THRU();
583 case 2: /* cost: 8 */
584 CHECK_OPCODES_CMP_IMM16(idxRegTmp);
585 break;
586 case 3: /* cost: 6 */
587 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
588 break;
589 }
590
591 while (cbLeft >= 4)
592 CHECK_OPCODES_CMP_IMM32(idxRegTmp); /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
593
594 if (cbLeft >= 2)
595 CHECK_OPCODES_CMP_IMM16(idxRegTmp); /* cost: 8 */
596 if (cbLeft)
597 CHECK_OPCODES_CMP_IMM8(idxRegTmp); /* cost: 6 */
598
599 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
600 iemNativeRegFreeTmp(pReNative, idxRegTmp);
601 }
602 else
603 {
604 /* RDI = &pbInstrBuf[offPage] */
605 uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
606 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
607 if (offPage != 0)
608 off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
609
610 /* RSI = pbOpcodes */
611 uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
612 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
613
614 /* RCX = counts. */
615 uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
616
617 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 10 + 5 + 5 + 3 + 4 + 3 /*= 35*/);
618
619 /** @todo profile and optimize this further. Maybe an idea to align by
620 * offPage if the two cannot be reconsidled. */
621 /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
622 switch (offPage & 7) /* max cost: 10 */
623 {
624 case 0:
625 break;
626 case 1: /* cost: 3+4+3 = 10 */
627 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
628 RT_FALL_THRU();
629 case 2: /* cost: 4+3 = 7 */
630 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
631 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
632 break;
633 case 3: /* cost: 3+3 = 6 */
634 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
635 RT_FALL_THRU();
636 case 4: /* cost: 3 */
637 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
638 break;
639 case 5: /* cost: 3+4 = 7 */
640 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
641 RT_FALL_THRU();
642 case 6: /* cost: 4 */
643 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
644 break;
645 case 7: /* cost: 3 */
646 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
647 break;
648 }
649
650 /* Compare qwords: */
651 uint32_t const cQWords = cbLeft >> 3;
652 CHECK_OPCODES_ECX_IMM(cQWords); /* cost: 5 */
653
654 pbCodeBuf[off++] = X86_OP_PRF_REPZ; /* cost: 5 */
655 CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
656 cbLeft &= 7;
657
658 if (cbLeft & 4)
659 CHECK_OPCODES_CMPSX(0xa7, 4, 0); /* cost: 3 */
660 if (cbLeft & 2)
661 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP); /* cost: 4 */
662 if (cbLeft & 1)
663 CHECK_OPCODES_CMPSX(0xa6, 1, 0); /* cost: 3 */
664
665 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
666 iemNativeRegFreeTmp(pReNative, idxRegCx);
667 iemNativeRegFreeTmp(pReNative, idxRegSi);
668 iemNativeRegFreeTmp(pReNative, idxRegDi);
669 }
670
671#elif defined(RT_ARCH_ARM64)
672 /* We need pbInstrBuf in a register, whatever we do. */
673 uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off);
674 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
675
676 /* We also need at least one more register for holding bytes & words we
677 load via pbInstrBuf. */
678 uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off);
679
680 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
681
682 /* One byte compare can be done with the opcode byte as an immediate. We'll
683 do this to uint16_t align src1. */
684 bool fPendingJmp = RT_BOOL(offPage & 1);
685 if (fPendingJmp)
686 {
687 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
688 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/);
689 offPage += 1;
690 cbLeft -= 1;
691 }
692
693 if (cbLeft > 0)
694 {
695 /* We need a register for holding the opcode bytes we're comparing with,
696 as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */
697 uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off);
698
699 /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */
700 if ((offPage & 3) && cbLeft >= 2)
701 {
702 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2);
703 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
704 if (fPendingJmp)
705 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
706 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
707 else
708 {
709 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
710 fPendingJmp = true;
711 }
712 pbOpcodes += 2;
713 offPage += 2;
714 cbLeft -= 2;
715 }
716
717 /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */
718 if ((offPage & 7) && cbLeft >= 4)
719 {
720 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4);
721 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
722 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
723 if (fPendingJmp)
724 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
725 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
726 else
727 {
728 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
729 fPendingJmp = true;
730 }
731 pbOpcodes += 4;
732 offPage += 4;
733 cbLeft -= 4;
734 }
735
736 /*
737 * If we've got 16 bytes or more left, switch to memcmp-style.
738 */
739 if (cbLeft >= 16)
740 {
741 /* We need a pointer to the copy of the original opcode bytes. */
742 uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off);
743 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes);
744
745 /* If there are more than 32 bytes to compare we create a loop, for
746 which we'll need a loop register. */
747 if (cbLeft >= 64)
748 {
749 if (fPendingJmp)
750 {
751 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
752 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
753 fPendingJmp = false;
754 }
755
756 uint8_t const idxRegLoop = iemNativeRegAllocTmp(pReNative, &off);
757 uint16_t const cLoops = cbLeft / 32;
758 cbLeft = cbLeft % 32;
759 pbOpcodes += cLoops * 32;
760 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops);
761
762 if (offPage != 0) /** @todo optimize out this instruction. */
763 {
764 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage);
765 offPage = 0;
766 }
767
768 uint32_t const offLoopStart = off;
769 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0);
770 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0);
771 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
772
773 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1);
774 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1);
775 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
776 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
777
778 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2);
779 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2);
780 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
781 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
782
783 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3);
784 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3);
785 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
786 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
787
788 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
789 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
790
791 /* Advance and loop. */
792 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20);
793 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20);
794 pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/);
795 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off);
796
797 iemNativeRegFreeTmp(pReNative, idxRegLoop);
798 }
799
800 /* Deal with any remaining dwords (uint64_t). There can be up to
801 three if we looped and four if we didn't. */
802 uint32_t offSrc2 = 0;
803 while (cbLeft >= 8)
804 {
805 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val,
806 idxRegSrc1Ptr, offPage / 8);
807 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val,
808 idxRegSrc2Ptr, offSrc2 / 8);
809 if (fPendingJmp)
810 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
811 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
812 else
813 {
814 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
815 fPendingJmp = true;
816 }
817 pbOpcodes += 8;
818 offPage += 8;
819 offSrc2 += 8;
820 cbLeft -= 8;
821 }
822
823 iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr);
824 /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */
825 }
826 /*
827 * Otherwise, we compare with constants and merge with the general mop-up.
828 */
829 else
830 {
831 while (cbLeft >= 8)
832 {
833 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr,
834 offPage / 8);
835 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val,
836 RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4],
837 pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
838 if (fPendingJmp)
839 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
840 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/);
841 else
842 {
843 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/);
844 fPendingJmp = true;
845 }
846 pbOpcodes += 8;
847 offPage += 8;
848 cbLeft -= 8;
849 }
850 /* max cost thus far: 21 */
851 }
852
853 /* Deal with any remaining bytes (7 or less). */
854 Assert(cbLeft < 8);
855 if (cbLeft >= 4)
856 {
857 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr,
858 offPage / 4);
859 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
860 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
861 if (fPendingJmp)
862 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
863 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
864 else
865 {
866 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
867 fPendingJmp = true;
868 }
869 pbOpcodes += 4;
870 offPage += 4;
871 cbLeft -= 4;
872
873 }
874
875 if (cbLeft >= 2)
876 {
877 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr,
878 offPage / 2);
879 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
880 if (fPendingJmp)
881 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
882 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
883 else
884 {
885 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
886 fPendingJmp = true;
887 }
888 pbOpcodes += 2;
889 offPage += 2;
890 cbLeft -= 2;
891 }
892
893 if (cbLeft > 0)
894 {
895 Assert(cbLeft == 1);
896 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
897 if (fPendingJmp)
898 {
899 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]);
900 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
901 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
902 }
903 else
904 {
905 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/);
906 fPendingJmp = true;
907 }
908 pbOpcodes += 1;
909 offPage += 1;
910 cbLeft -= 1;
911 }
912
913 iemNativeRegFreeTmp(pReNative, idxRegSrc2Val);
914 }
915 Assert(cbLeft == 0);
916
917 /*
918 * Finally, the branch on difference.
919 */
920 if (fPendingJmp)
921 {
922 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
923 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
924 }
925 RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump, idxLabelObsoleteTb);
926
927 /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */
928 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
929 iemNativeRegFreeTmp(pReNative, idxRegSrc1Val);
930 iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr);
931
932#else
933# error "Port me"
934#endif
935 return off;
936}
937
938
939/** Duplicated in IEMAllThrdFuncsBltIn.cpp. */
940DECL_FORCE_INLINE(RTGCPHYS) iemTbGetRangePhysPageAddr(PCIEMTB pTb, uint8_t idxRange)
941{
942 Assert(idxRange < RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges)));
943 uint8_t const idxPage = pTb->aRanges[idxRange].idxPhysPage;
944 Assert(idxPage <= RT_ELEMENTS(pTb->aGCPhysPages));
945 if (idxPage == 0)
946 return pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
947 Assert(!(pTb->aGCPhysPages[idxPage - 1] & GUEST_PAGE_OFFSET_MASK));
948 return pTb->aGCPhysPages[idxPage - 1];
949}
950
951
952/**
953 * Macro that implements PC check after a conditional branch.
954 */
955#define BODY_CHECK_PC_AFTER_BRANCH(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
956 RT_NOREF(a_cbInstr); \
957 off = iemNativeEmitBltInCheckPcAfterBranch(pReNative, off, a_pTb, a_idxRange, a_offRange)
958
959DECL_FORCE_INLINE(uint32_t)
960iemNativeEmitBltInCheckPcAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb,
961 uint8_t idxRange, uint16_t offRange)
962{
963#ifdef VBOX_STRICT
964 off = iemNativeEmitMarker(pReNative, off, 0x80000004);
965#endif
966
967 /*
968 * The GCPhysRangePageWithOffset value in the threaded function is a fixed
969 * constant for us here.
970 *
971 * We can pretend that iem.s.cbInstrBufTotal is X86_PAGE_SIZE here, because
972 * it serves no purpose as a CS.LIM, if that's needed we've just performed
973 * it, and as long as we don't implement code TLB reload code here there is
974 * no point in checking that the TLB data we're using is still valid.
975 *
976 * What we to do is.
977 * 1. Calculate the FLAT PC (RIP + CS.BASE).
978 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
979 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
980 * we're in the wrong spot and need to find a new TB.
981 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
982 * GCPhysRangePageWithOffset constant mentioned above.
983 *
984 * The adding of CS.BASE to RIP can be skipped in the first step if we're
985 * in 64-bit code or flat 32-bit.
986 */
987
988 /* Allocate registers for step 1. Get the shadowed stuff before allocating
989 the temp register, so we don't accidentally clobber something we'll be
990 needing again immediately. This is why we get idxRegCsBase here. */
991 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
992 kIemNativeGstRegUse_ReadOnly);
993 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
994 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
995 kIemNativeGstRegUse_ReadOnly);
996
997 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
998
999#ifdef VBOX_STRICT
1000 /* Do assertions before idxRegTmp contains anything. */
1001 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1002# ifdef RT_ARCH_AMD64
1003 {
1004 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1005 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1006 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1007 {
1008 /* cmp r/m64, imm8 */
1009 pbCodeBuf[off++] = X86_OP_REX_W;
1010 pbCodeBuf[off++] = 0x83;
1011 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1012 pbCodeBuf[off++] = 0;
1013 /* je rel8 */
1014 pbCodeBuf[off++] = 0x74;
1015 pbCodeBuf[off++] = 1;
1016 /* int3 */
1017 pbCodeBuf[off++] = 0xcc;
1018
1019 }
1020
1021 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1022 /* test r/m64, imm32 */
1023 pbCodeBuf[off++] = X86_OP_REX_W;
1024 pbCodeBuf[off++] = 0xf7;
1025 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1026 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1027 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1028 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1029 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1030 /* jz rel8 */
1031 pbCodeBuf[off++] = 0x74;
1032 pbCodeBuf[off++] = 1;
1033 /* int3 */
1034 pbCodeBuf[off++] = 0xcc;
1035 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1036 }
1037# else
1038off = iemNativeEmitBrk(pReNative, off, 0x1234);
1039
1040 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1041 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1042 {
1043 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1044# ifdef RT_ARCH_ARM64
1045 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1046 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, idxRegTmp);
1047 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2004);
1048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1049# else
1050# error "Port me!"
1051# endif
1052 }
1053# endif
1054
1055#endif /* VBOX_STRICT */
1056
1057 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1058 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1059 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1060 {
1061#ifdef RT_ARCH_ARM64
1062 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1063 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1064 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1065#else
1066 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1067 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1068#endif
1069 }
1070 else
1071 {
1072#ifdef RT_ARCH_ARM64
1073 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1074 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1075 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1076 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1077#else
1078 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1079 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1080 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1081#endif
1082 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1083 }
1084 iemNativeRegFreeTmp(pReNative, idxRegPc);
1085
1086 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal. */
1087 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1088 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_CheckBranchMiss);
1089
1090 /* 4. Add iem.s.GCPhysInstrBuf and compare with GCPhysRangePageWithOffset. */
1091#ifdef RT_ARCH_AMD64
1092 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1093 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1094 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1095 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1096 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1097
1098#elif defined(RT_ARCH_ARM64)
1099 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1100
1101 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1102 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1103 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1104 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1105
1106# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1107 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1108 off = iemNativeEmitJzToFixed(pReNative, off, 1);
1109 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1110# endif
1111 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1112#else
1113# error "Port me"
1114#endif
1115
1116 RTGCPHYS const GCPhysRangePageWithOffset = ( iemTbGetRangePhysPageAddr(pTb, idxRange)
1117 | pTb->aRanges[idxRange].offPhysPage)
1118 + offRange;
1119 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegTmp, GCPhysRangePageWithOffset,
1120 kIemNativeLabelType_CheckBranchMiss);
1121
1122 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1123 return off;
1124}
1125
1126
1127#ifdef BODY_CHECK_CS_LIM
1128/**
1129 * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
1130 * raising a \#GP(0) if this isn't the case.
1131 */
1132IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
1133{
1134 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1135 BODY_SET_CUR_INSTR();
1136 BODY_CHECK_CS_LIM(cbInstr);
1137 return off;
1138}
1139#endif
1140
1141
1142#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
1143/**
1144 * Built-in function for re-checking opcodes and CS.LIM after an instruction
1145 * that may have modified them.
1146 */
1147IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
1148{
1149 PCIEMTB const pTb = pReNative->pTbOrg;
1150 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1151 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1152 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1153 BODY_SET_CUR_INSTR();
1154 BODY_CHECK_CS_LIM(cbInstr);
1155 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1156 return off;
1157}
1158#endif
1159
1160
1161#if defined(BODY_CHECK_OPCODES)
1162/**
1163 * Built-in function for re-checking opcodes after an instruction that may have
1164 * modified them.
1165 */
1166IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
1167{
1168 PCIEMTB const pTb = pReNative->pTbOrg;
1169 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1170 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1171 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1172 BODY_SET_CUR_INSTR();
1173 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1174 return off;
1175}
1176#endif
1177
1178
1179#if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1180/**
1181 * Built-in function for re-checking opcodes and considering the need for CS.LIM
1182 * checking after an instruction that may have modified them.
1183 */
1184IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
1185{
1186 PCIEMTB const pTb = pReNative->pTbOrg;
1187 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1188 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1189 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1190 BODY_SET_CUR_INSTR();
1191 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1192 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1193 return off;
1194}
1195#endif
1196
1197
1198/*
1199 * Post-branching checkers.
1200 */
1201
1202#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1203/**
1204 * Built-in function for checking CS.LIM, checking the PC and checking opcodes
1205 * after conditional branching within the same page.
1206 *
1207 * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
1208 */
1209IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
1210{
1211 PCIEMTB const pTb = pReNative->pTbOrg;
1212 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1213 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1214 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1215 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1216 BODY_SET_CUR_INSTR();
1217 BODY_CHECK_CS_LIM(cbInstr);
1218 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1219 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1220 //LogFunc(("okay\n"));
1221 return off;
1222}
1223#endif
1224
1225
1226#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
1227/**
1228 * Built-in function for checking the PC and checking opcodes after conditional
1229 * branching within the same page.
1230 *
1231 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1232 */
1233IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
1234{
1235 PCIEMTB const pTb = pReNative->pTbOrg;
1236 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1237 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1238 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1239 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1240 BODY_SET_CUR_INSTR();
1241 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1242 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1243 //LogFunc(("okay\n"));
1244 return off;
1245}
1246#endif
1247
1248
1249#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1250/**
1251 * Built-in function for checking the PC and checking opcodes and considering
1252 * the need for CS.LIM checking after conditional branching within the same
1253 * page.
1254 *
1255 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1256 */
1257IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
1258{
1259 PCIEMTB const pTb = pReNative->pTbOrg;
1260 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1261 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1262 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1263 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1264 BODY_SET_CUR_INSTR();
1265 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1266 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1267 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1268 //LogFunc(("okay\n"));
1269 return off;
1270}
1271#endif
1272
1273
1274#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1275/**
1276 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1277 * transitioning to a different code page.
1278 *
1279 * The code page transition can either be natural over onto the next page (with
1280 * the instruction starting at page offset zero) or by means of branching.
1281 *
1282 * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1283 */
1284IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
1285{
1286 PCIEMTB const pTb = pReNative->pTbOrg;
1287 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1288 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1289 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1290 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1291 BODY_SET_CUR_INSTR();
1292 BODY_CHECK_CS_LIM(cbInstr);
1293 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1294 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1295 //LogFunc(("okay\n"));
1296 return off;
1297}
1298#endif
1299
1300
1301#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
1302/**
1303 * Built-in function for loading TLB and checking opcodes when transitioning to
1304 * a different code page.
1305 *
1306 * The code page transition can either be natural over onto the next page (with
1307 * the instruction starting at page offset zero) or by means of branching.
1308 *
1309 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1310 */
1311IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
1312{
1313 PCIEMTB const pTb = pReNative->pTbOrg;
1314 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1315 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1316 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1317 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1318 BODY_SET_CUR_INSTR();
1319 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1320 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1321 //LogFunc(("okay\n"));
1322 return off;
1323}
1324#endif
1325
1326
1327#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1328/**
1329 * Built-in function for loading TLB and checking opcodes and considering the
1330 * need for CS.LIM checking when transitioning to a different code page.
1331 *
1332 * The code page transition can either be natural over onto the next page (with
1333 * the instruction starting at page offset zero) or by means of branching.
1334 *
1335 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1336 */
1337IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
1338{
1339 PCIEMTB const pTb = pReNative->pTbOrg;
1340 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1341 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1342 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1343 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1344 BODY_SET_CUR_INSTR();
1345 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1346 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1347 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1348 //LogFunc(("okay\n"));
1349 return off;
1350}
1351#endif
1352
1353
1354
1355/*
1356 * Natural page crossing checkers.
1357 */
1358
1359#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1360/**
1361 * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
1362 * both pages when transitioning to a different code page.
1363 *
1364 * This is used when the previous instruction requires revalidation of opcodes
1365 * bytes and the current instruction stries a page boundrary with opcode bytes
1366 * in both the old and new page.
1367 *
1368 * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1369 */
1370IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
1371{
1372 PCIEMTB const pTb = pReNative->pTbOrg;
1373 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1374 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1375 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1376 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1377 uint32_t const idxRange2 = idxRange1 + 1;
1378 BODY_SET_CUR_INSTR();
1379 BODY_CHECK_CS_LIM(cbInstr);
1380 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1381 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1382 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1383 return off;
1384}
1385#endif
1386
1387
1388#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1389/**
1390 * Built-in function for loading TLB and checking opcodes on both pages when
1391 * transitioning to a different code page.
1392 *
1393 * This is used when the previous instruction requires revalidation of opcodes
1394 * bytes and the current instruction stries a page boundrary with opcode bytes
1395 * in both the old and new page.
1396 *
1397 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1398 */
1399IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
1400{
1401 PCIEMTB const pTb = pReNative->pTbOrg;
1402 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1403 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1404 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1405 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1406 uint32_t const idxRange2 = idxRange1 + 1;
1407 BODY_SET_CUR_INSTR();
1408 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1409 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1410 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1411 return off;
1412}
1413#endif
1414
1415
1416#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1417/**
1418 * Built-in function for loading TLB and checking opcodes on both pages and
1419 * considering the need for CS.LIM checking when transitioning to a different
1420 * code page.
1421 *
1422 * This is used when the previous instruction requires revalidation of opcodes
1423 * bytes and the current instruction stries a page boundrary with opcode bytes
1424 * in both the old and new page.
1425 *
1426 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1427 */
1428IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
1429{
1430 PCIEMTB const pTb = pReNative->pTbOrg;
1431 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1432 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1433 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1434 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1435 uint32_t const idxRange2 = idxRange1 + 1;
1436 BODY_SET_CUR_INSTR();
1437 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1438 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1439 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1440 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1441 return off;
1442}
1443#endif
1444
1445
1446#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1447/**
1448 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1449 * advancing naturally to a different code page.
1450 *
1451 * Only opcodes on the new page is checked.
1452 *
1453 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1454 */
1455IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
1456{
1457 PCIEMTB const pTb = pReNative->pTbOrg;
1458 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1459 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1460 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1461 //uint32_t const offRange1 = (uint32_t)uParam2;
1462 uint32_t const idxRange2 = idxRange1 + 1;
1463 BODY_SET_CUR_INSTR();
1464 BODY_CHECK_CS_LIM(cbInstr);
1465 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1466 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1467 return off;
1468}
1469#endif
1470
1471
1472#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1473/**
1474 * Built-in function for loading TLB and checking opcodes when advancing
1475 * naturally to a different code page.
1476 *
1477 * Only opcodes on the new page is checked.
1478 *
1479 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1480 */
1481IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
1482{
1483 PCIEMTB const pTb = pReNative->pTbOrg;
1484 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1485 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1486 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1487 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1488 uint32_t const idxRange2 = idxRange1 + 1;
1489 BODY_SET_CUR_INSTR();
1490 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1491 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1492 return off;
1493}
1494#endif
1495
1496
1497#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1498/**
1499 * Built-in function for loading TLB and checking opcodes and considering the
1500 * need for CS.LIM checking when advancing naturally to a different code page.
1501 *
1502 * Only opcodes on the new page is checked.
1503 *
1504 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1505 */
1506IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
1507{
1508 PCIEMTB const pTb = pReNative->pTbOrg;
1509 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1510 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1511 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1512 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1513 uint32_t const idxRange2 = idxRange1 + 1;
1514 BODY_SET_CUR_INSTR();
1515 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1516 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1517 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1518 return off;
1519}
1520#endif
1521
1522
1523#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1524/**
1525 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1526 * advancing naturally to a different code page with first instr at byte 0.
1527 *
1528 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1529 */
1530IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
1531{
1532 PCIEMTB const pTb = pReNative->pTbOrg;
1533 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1534 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1535 BODY_SET_CUR_INSTR();
1536 BODY_CHECK_CS_LIM(cbInstr);
1537 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1538 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1539 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1540 return off;
1541}
1542#endif
1543
1544
1545#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1546/**
1547 * Built-in function for loading TLB and checking opcodes when advancing
1548 * naturally to a different code page with first instr at byte 0.
1549 *
1550 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1551 */
1552IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
1553{
1554 PCIEMTB const pTb = pReNative->pTbOrg;
1555 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1556 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1557 BODY_SET_CUR_INSTR();
1558 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1559 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1560 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1561 return off;
1562}
1563#endif
1564
1565
1566#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1567/**
1568 * Built-in function for loading TLB and checking opcodes and considering the
1569 * need for CS.LIM checking when advancing naturally to a different code page
1570 * with first instr at byte 0.
1571 *
1572 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1573 */
1574IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
1575{
1576 PCIEMTB const pTb = pReNative->pTbOrg;
1577 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1578 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1579 BODY_SET_CUR_INSTR();
1580 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1581 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1582 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1583 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1584 return off;
1585}
1586#endif
1587
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette