VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompBltIn.cpp@ 102683

Last change on this file since 102683 was 102683, checked in by vboxsync, 16 months ago

VMM/IEM: Fixed bug in native opcode comparison, caused lots of incorrect obsoletion when a check didn't start at the first byte in an opcode range. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 68.0 KB
Line 
1/* $Id: IEMAllN8veRecompBltIn.cpp 102683 2023-12-21 19:18:59Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, Emitters for Built-In Threaded Functions.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#include <VBox/vmm/iem.h>
37#include <VBox/vmm/cpum.h>
38#include <VBox/vmm/dbgf.h>
39#include "IEMInternal.h"
40#include <VBox/vmm/vmcc.h>
41#include <VBox/log.h>
42#include <VBox/err.h>
43#include <VBox/param.h>
44#include <iprt/assert.h>
45#include <iprt/string.h>
46#if defined(RT_ARCH_AMD64)
47# include <iprt/x86.h>
48#elif defined(RT_ARCH_ARM64)
49# include <iprt/armv8.h>
50#endif
51
52
53#include "IEMInline.h"
54#include "IEMThreadedFunctions.h"
55#include "IEMN8veRecompiler.h"
56#include "IEMN8veRecompilerEmit.h"
57
58
59
60/*********************************************************************************************************************************
61* TB Helper Functions *
62*********************************************************************************************************************************/
63#ifdef RT_ARCH_AMD64
64DECLASM(void) iemNativeHlpAsmSafeWrapLogCpuState(void);
65#endif
66
67
68
69/*********************************************************************************************************************************
70* Builtin functions *
71*********************************************************************************************************************************/
72
73/**
74 * Built-in function that does nothing.
75 *
76 * Whether this is called or not can be controlled by the entry in the
77 * IEMThreadedGenerator.katBltIns table. This can be useful to determine
78 * whether why behaviour changes when enabling the LogCpuState builtins. I.e.
79 * whether it's the reduced call count in the TBs or the threaded calls flushing
80 * register state.
81 */
82IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_Nop)
83{
84 RT_NOREF(pReNative, pCallEntry);
85 return off;
86}
87
88
89/**
90 * Emits for for LogCpuState.
91 *
92 * This shouldn't have any relevant impact on the recompiler state.
93 */
94IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_LogCpuState)
95{
96#ifdef RT_ARCH_AMD64
97 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
98 /* push rax */
99 pbCodeBuf[off++] = 0x50 + X86_GREG_xAX;
100 /* push imm32 */
101 pbCodeBuf[off++] = 0x68;
102 pbCodeBuf[off++] = RT_BYTE1(pCallEntry->auParams[0]);
103 pbCodeBuf[off++] = RT_BYTE2(pCallEntry->auParams[0]);
104 pbCodeBuf[off++] = RT_BYTE3(pCallEntry->auParams[0]);
105 pbCodeBuf[off++] = RT_BYTE4(pCallEntry->auParams[0]);
106 /* mov rax, iemNativeHlpAsmSafeWrapLogCpuState */
107 pbCodeBuf[off++] = X86_OP_REX_W;
108 pbCodeBuf[off++] = 0xb8 + X86_GREG_xAX;
109 *(uint64_t *)&pbCodeBuf[off] = (uintptr_t)iemNativeHlpAsmSafeWrapLogCpuState;
110 off += sizeof(uint64_t);
111 /* call rax */
112 pbCodeBuf[off++] = 0xff;
113 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
114 /* pop rax */
115 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
116 /* pop rax */
117 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
119
120#else
121 /** @todo Implement this */
122 AssertFailed();
123 RT_NOREF(pReNative, pCallEntry);
124#endif
125 return off;
126}
127
128
129/**
130 * Built-in function that calls a C-implemention function taking zero arguments.
131 */
132IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
133{
134 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
135 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
136 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
137 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
138}
139
140
141/**
142 * Built-in function that checks for pending interrupts that can be delivered or
143 * forced action flags.
144 *
145 * This triggers after the completion of an instruction, so EIP is already at
146 * the next instruction. If an IRQ or important FF is pending, this will return
147 * a non-zero status that stops TB execution.
148 */
149IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
150{
151 RT_NOREF(pCallEntry);
152
153 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
154 and I'm too lazy to create a 'Fixed' version of that one. */
155 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
156 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
157
158 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
159
160 /* Again, we need to load the extended EFLAGS before we actually need them
161 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
162 loaded them inside the check, as the shadow state would not be correct
163 when the code branches before the load. Ditto PC. */
164 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
165 kIemNativeGstRegUse_ReadOnly);
166
167 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
168
169 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
170
171 /*
172 * Start by checking the local forced actions of the EMT we're on for IRQs
173 * and other FFs that needs servicing.
174 */
175 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
176 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
177 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
178 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
179 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
180 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
181 | VMCPU_FF_TLB_FLUSH
182 | VMCPU_FF_UNHALT ),
183 true /*fSetFlags*/);
184 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
185 uint32_t const offFixupJumpToVmCheck1 = off;
186 off = iemNativeEmitJzToFixed(pReNative, off, 0);
187
188 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
189 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
190 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
191 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
192 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
193 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
194
195 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
196 suppressed by the CPU or not. */
197 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
198 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
199 idxLabelReturnBreak);
200
201 /* We've got shadow flags set, so we must check that the PC they are valid
202 for matches our current PC value. */
203 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
204 * a register. */
205 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
206 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
207
208 /*
209 * Now check the force flags of the VM.
210 */
211 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
212 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
213 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
214 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
215 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
216 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
217
218 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
219
220 /*
221 * We're good, no IRQs or FFs pending.
222 */
223 iemNativeRegFreeTmp(pReNative, idxTmpReg);
224 iemNativeRegFreeTmp(pReNative, idxEflReg);
225 iemNativeRegFreeTmp(pReNative, idxPcReg);
226
227 return off;
228}
229
230
231/**
232 * Built-in function checks if IEMCPU::fExec has the expected value.
233 */
234IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
235{
236 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
237 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
238
239 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
240 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
241 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
242 kIemNativeLabelType_ReturnBreak);
243 iemNativeRegFreeTmp(pReNative, idxTmpReg);
244 return off;
245}
246
247
248/**
249 * Sets idxTbCurInstr in preparation of raising an exception.
250 */
251/** @todo Optimize this, so we don't set the same value more than once. Just
252 * needs some tracking. */
253#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
254# define BODY_SET_CUR_INSTR() \
255 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
256#else
257# define BODY_SET_CUR_INSTR() ((void)0)
258#endif
259
260
261/**
262 * Macro that emits the 16/32-bit CS.LIM check.
263 */
264#define BODY_CHECK_CS_LIM(a_cbInstr) \
265 off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
266
267DECL_FORCE_INLINE(uint32_t)
268iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
269{
270 Assert(cbInstr > 0);
271 Assert(cbInstr < 16);
272#ifdef VBOX_STRICT
273 off = iemNativeEmitMarker(pReNative, off, 0x80000001);
274#endif
275
276 /*
277 * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
278 * a temporary register for calculating the last address of the instruction.
279 *
280 * The calculation and comparisons are 32-bit. We ASSUME that the incoming
281 * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
282 * that last updated EIP here checked it already, and that we're therefore
283 * safe in the 32-bit wrap-around scenario to only check that the last byte
284 * is within CS.LIM. In the case of instruction-by-instruction advancing
285 * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
286 * must be using 4KB granularity and the previous instruction was fine.
287 */
288 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
289 kIemNativeGstRegUse_ReadOnly);
290 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
291 kIemNativeGstRegUse_ReadOnly);
292#ifdef RT_ARCH_AMD64
293 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
294#elif defined(RT_ARCH_ARM64)
295 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
296#else
297# error "Port me"
298#endif
299
300 if (cbInstr != 1)
301 {
302 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
303
304 /*
305 * 1. idxRegTmp = idxRegPc + cbInstr;
306 * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
307 */
308#ifdef RT_ARCH_AMD64
309 /* 1. lea tmp32, [Pc + cbInstr - 1] */
310 if (idxRegTmp >= 8 || idxRegPc >= 8)
311 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
312 pbCodeBuf[off++] = 0x8d;
313 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
314 if ((idxRegPc & 7) == X86_GREG_xSP)
315 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
316 pbCodeBuf[off++] = cbInstr - 1;
317
318 /* 2. cmp tmp32(r), CsLim(r/m). */
319 if (idxRegTmp >= 8 || idxRegCsLim >= 8)
320 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
321 pbCodeBuf[off++] = 0x3b;
322 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
323
324#elif defined(RT_ARCH_ARM64)
325 /* 1. add tmp32, Pc, #cbInstr-1 */
326 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
327 /* 2. cmp tmp32, CsLim */
328 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
329 false /*f64Bit*/, true /*fSetFlags*/);
330
331#endif
332 iemNativeRegFreeTmp(pReNative, idxRegTmp);
333 }
334 else
335 {
336 /*
337 * Here we can skip step 1 and compare PC and CS.LIM directly.
338 */
339#ifdef RT_ARCH_AMD64
340 /* 2. cmp eip(r), CsLim(r/m). */
341 if (idxRegPc >= 8 || idxRegCsLim >= 8)
342 pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
343 pbCodeBuf[off++] = 0x3b;
344 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
345
346#elif defined(RT_ARCH_ARM64)
347 /* 2. cmp Pc, CsLim */
348 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
349 false /*f64Bit*/, true /*fSetFlags*/);
350
351#endif
352 }
353
354 /* 3. Jump if greater. */
355 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
356
357 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
358 iemNativeRegFreeTmp(pReNative, idxRegPc);
359 return off;
360}
361
362
363/**
364 * Macro that considers whether we need CS.LIM checking after a branch or
365 * crossing over to a new page.
366 */
367#define BODY_CONSIDER_CS_LIM_CHECKING(a_pTb, a_cbInstr) \
368 RT_NOREF(a_cbInstr); \
369 off = iemNativeEmitBltInConsiderLimChecking(pReNative, off)
370
371DECL_FORCE_INLINE(uint32_t)
372iemNativeEmitBltInConsiderLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off)
373{
374#ifdef VBOX_STRICT
375 off = iemNativeEmitMarker(pReNative, off, 0x80000002);
376#endif
377
378 /*
379 * This check must match the ones in the iem in iemGetTbFlagsForCurrentPc
380 * exactly:
381 *
382 * int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
383 * if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
384 * return fRet;
385 * return fRet | IEMTB_F_CS_LIM_CHECKS;
386 *
387 *
388 * We need EIP, CS.LIM and CS.BASE here.
389 */
390
391 /* Calculate the offFromLim first: */
392 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
393 kIemNativeGstRegUse_ReadOnly);
394 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
395 kIemNativeGstRegUse_ReadOnly);
396 uint8_t const idxRegLeft = iemNativeRegAllocTmp(pReNative, &off);
397
398#ifdef RT_ARCH_ARM64
399 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
400 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegLeft, idxRegCsLim, idxRegPc);
401 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
402#else
403 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegLeft, idxRegCsLim);
404 off = iemNativeEmitSubTwoGprs(pReNative, off, idxRegLeft, idxRegPc);
405#endif
406
407 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
408 iemNativeRegFreeTmp(pReNative, idxRegPc);
409
410 /* Calculate the threshold level (right side). */
411 uint8_t const idxRegCsBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
412 kIemNativeGstRegUse_ReadOnly);
413 uint8_t const idxRegRight = iemNativeRegAllocTmp(pReNative, &off);
414
415#ifdef RT_ARCH_ARM64
416 pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
417 Assert(Armv8A64ConvertImmRImmS2Mask32(11, 0) == GUEST_PAGE_OFFSET_MASK);
418 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegRight, idxRegCsBase, 11, 0, false /*f64Bit*/);
419 pu32CodeBuf[off++] = Armv8A64MkInstrNeg(idxRegRight);
420 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
421 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
422 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
423
424#else
425 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegRight, GUEST_PAGE_OFFSET_MASK);
426 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegRight, idxRegCsBase);
427 off = iemNativeEmitNegGpr(pReNative, off, idxRegRight);
428 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRight, X86_PAGE_SIZE + 16);
429#endif
430
431 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
432
433 /* Compare the two and jump out if we're too close to the limit. */
434 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegLeft, idxRegRight);
435 off = iemNativeEmitJlToNewLabel(pReNative, off, kIemNativeLabelType_NeedCsLimChecking);
436
437 iemNativeRegFreeTmp(pReNative, idxRegRight);
438 iemNativeRegFreeTmp(pReNative, idxRegLeft);
439 return off;
440}
441
442
443
444/**
445 * Macro that implements opcode (re-)checking.
446 */
447#define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
448 RT_NOREF(a_cbInstr); \
449 off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
450
451#if 0 /* debugging aid */
452bool g_fBpOnObsoletion = false;
453# define BP_ON_OBSOLETION g_fBpOnObsoletion
454#else
455# define BP_ON_OBSOLETION 0
456#endif
457
458DECL_FORCE_INLINE(uint32_t)
459iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
460{
461 Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
462 Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
463#ifdef VBOX_STRICT
464 off = iemNativeEmitMarker(pReNative, off, 0x80000003);
465#endif
466
467 uint32_t const idxLabelObsoleteTb = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ObsoleteTb);
468
469 /*
470 * Where to start and how much to compare.
471 *
472 * Looking at the ranges produced when r160746 was running a DOS VM with TB
473 * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
474 * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
475 *
476 * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
477 * 12, 10, 11, 6, 13, 15 and 16. Max 0x359 bytes. Same revision as above.
478 */
479 uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange;
480 uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange;
481 Assert(cbLeft > 0);
482 uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes + offRange];
483 uint32_t offConsolidatedJump = UINT32_MAX;
484
485#ifdef RT_ARCH_AMD64
486 /* AMD64/x86 offers a bunch of options. Smaller stuff will can be
487 completely inlined, for larger we use REPE CMPS. */
488# define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */ do { \
489 pbCodeBuf[off++] = a_bOpcode; \
490 Assert(offPage < 127); \
491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
492 pbCodeBuf[off++] = RT_BYTE1(offPage); \
493 } while (0)
494
495# define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
496 if (offConsolidatedJump != UINT32_MAX) \
497 { \
498 int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
499 Assert(offDisp >= -128); \
500 pbCodeBuf[off++] = 0x75; /* jnz near */ \
501 pbCodeBuf[off++] = (uint8_t)offDisp; \
502 } \
503 else \
504 { \
505 pbCodeBuf[off++] = 0x74; /* jz near +5 */ \
506 pbCodeBuf[off++] = 0x05 + BP_ON_OBSOLETION; \
507 offConsolidatedJump = off; \
508 if (BP_ON_OBSOLETION) pbCodeBuf[off++] = 0xcc; \
509 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */ \
510 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_Rel32, -4); \
511 pbCodeBuf[off++] = 0x00; \
512 pbCodeBuf[off++] = 0x00; \
513 pbCodeBuf[off++] = 0x00; \
514 pbCodeBuf[off++] = 0x00; \
515 } \
516 } while (0)
517
518# define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
519 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
520 pbCodeBuf[off++] = *pbOpcodes++; \
521 pbCodeBuf[off++] = *pbOpcodes++; \
522 pbCodeBuf[off++] = *pbOpcodes++; \
523 pbCodeBuf[off++] = *pbOpcodes++; \
524 cbLeft -= 4; \
525 offPage += 4; \
526 CHECK_OPCODES_CMP_JMP(); \
527 } while (0)
528
529# define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
530 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
531 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
532 pbCodeBuf[off++] = *pbOpcodes++; \
533 pbCodeBuf[off++] = *pbOpcodes++; \
534 cbLeft -= 2; \
535 offPage += 2; \
536 CHECK_OPCODES_CMP_JMP(); \
537 } while (0)
538
539# define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
540 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
541 pbCodeBuf[off++] = *pbOpcodes++; \
542 cbLeft -= 1; \
543 offPage += 1; \
544 CHECK_OPCODES_CMP_JMP(); \
545 } while (0)
546
547# define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
548 if (a_bPrefix) \
549 pbCodeBuf[off++] = (a_bPrefix); \
550 pbCodeBuf[off++] = (a_bOpcode); \
551 CHECK_OPCODES_CMP_JMP(); \
552 cbLeft -= (a_cbToSubtract); \
553 } while (0)
554
555# define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
556 pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
557 pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
558 pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
559 pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
560 pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
561 } while (0)
562
563 if (cbLeft <= 24)
564 {
565 uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
566 ( RT_BIT_32(X86_GREG_xAX)
567 | RT_BIT_32(X86_GREG_xCX)
568 | RT_BIT_32(X86_GREG_xDX)
569 | RT_BIT_32(X86_GREG_xBX)
570 | RT_BIT_32(X86_GREG_xSI)
571 | RT_BIT_32(X86_GREG_xDI))
572 & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
573 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
574 if (offPage >= 128 - cbLeft)
575 {
576 off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
577 offPage &= 3;
578 }
579
580 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 14 + 54 + 8 + 6 + BP_ON_OBSOLETION /* = 87 */);
581
582 if (cbLeft > 8)
583 switch (offPage & 3)
584 {
585 case 0:
586 break;
587 case 1: /* cost: 6 + 8 = 14 */
588 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
589 RT_FALL_THRU();
590 case 2: /* cost: 8 */
591 CHECK_OPCODES_CMP_IMM16(idxRegTmp);
592 break;
593 case 3: /* cost: 6 */
594 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
595 break;
596 }
597
598 while (cbLeft >= 4)
599 CHECK_OPCODES_CMP_IMM32(idxRegTmp); /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
600
601 if (cbLeft >= 2)
602 CHECK_OPCODES_CMP_IMM16(idxRegTmp); /* cost: 8 */
603 if (cbLeft)
604 CHECK_OPCODES_CMP_IMM8(idxRegTmp); /* cost: 6 */
605
606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
607 iemNativeRegFreeTmp(pReNative, idxRegTmp);
608 }
609 else
610 {
611 /* RDI = &pbInstrBuf[offPage] */
612 uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
613 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
614 if (offPage != 0)
615 off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
616
617 /* RSI = pbOpcodes */
618 uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
619 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
620
621 /* RCX = counts. */
622 uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
623
624 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 10 + 5 + 5 + 3 + 4 + 3 + BP_ON_OBSOLETION /*= 35*/);
625
626 /** @todo profile and optimize this further. Maybe an idea to align by
627 * offPage if the two cannot be reconsidled. */
628 /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
629 switch (offPage & 7) /* max cost: 10 */
630 {
631 case 0:
632 break;
633 case 1: /* cost: 3+4+3 = 10 */
634 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
635 RT_FALL_THRU();
636 case 2: /* cost: 4+3 = 7 */
637 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
638 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
639 break;
640 case 3: /* cost: 3+3 = 6 */
641 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
642 RT_FALL_THRU();
643 case 4: /* cost: 3 */
644 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
645 break;
646 case 5: /* cost: 3+4 = 7 */
647 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
648 RT_FALL_THRU();
649 case 6: /* cost: 4 */
650 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
651 break;
652 case 7: /* cost: 3 */
653 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
654 break;
655 }
656
657 /* Compare qwords: */
658 uint32_t const cQWords = cbLeft >> 3;
659 CHECK_OPCODES_ECX_IMM(cQWords); /* cost: 5 */
660
661 pbCodeBuf[off++] = X86_OP_PRF_REPZ; /* cost: 5 */
662 CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
663 cbLeft &= 7;
664
665 if (cbLeft & 4)
666 CHECK_OPCODES_CMPSX(0xa7, 4, 0); /* cost: 3 */
667 if (cbLeft & 2)
668 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP); /* cost: 4 */
669 if (cbLeft & 1)
670 CHECK_OPCODES_CMPSX(0xa6, 1, 0); /* cost: 3 */
671
672 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
673 iemNativeRegFreeTmp(pReNative, idxRegCx);
674 iemNativeRegFreeTmp(pReNative, idxRegSi);
675 iemNativeRegFreeTmp(pReNative, idxRegDi);
676 }
677
678#elif defined(RT_ARCH_ARM64)
679 /* We need pbInstrBuf in a register, whatever we do. */
680 uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off);
681 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
682
683 /* We also need at least one more register for holding bytes & words we
684 load via pbInstrBuf. */
685 uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off);
686
687 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
688
689 /* One byte compare can be done with the opcode byte as an immediate. We'll
690 do this to uint16_t align src1. */
691 bool fPendingJmp = RT_BOOL(offPage & 1);
692 if (fPendingJmp)
693 {
694 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
695 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/);
696 offPage += 1;
697 cbLeft -= 1;
698 }
699
700 if (cbLeft > 0)
701 {
702 /* We need a register for holding the opcode bytes we're comparing with,
703 as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */
704 uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off);
705
706 /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */
707 if ((offPage & 3) && cbLeft >= 2)
708 {
709 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2);
710 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
711 if (fPendingJmp)
712 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
713 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
714 else
715 {
716 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
717 fPendingJmp = true;
718 }
719 pbOpcodes += 2;
720 offPage += 2;
721 cbLeft -= 2;
722 }
723
724 /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */
725 if ((offPage & 7) && cbLeft >= 4)
726 {
727 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4);
728 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
729 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
730 if (fPendingJmp)
731 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
732 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
733 else
734 {
735 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
736 fPendingJmp = true;
737 }
738 pbOpcodes += 4;
739 offPage += 4;
740 cbLeft -= 4;
741 }
742
743 /*
744 * If we've got 16 bytes or more left, switch to memcmp-style.
745 */
746 if (cbLeft >= 16)
747 {
748 /* We need a pointer to the copy of the original opcode bytes. */
749 uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off);
750 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes);
751
752 /* If there are more than 32 bytes to compare we create a loop, for
753 which we'll need a loop register. */
754 if (cbLeft >= 64)
755 {
756 if (fPendingJmp)
757 {
758 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
759 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
760 fPendingJmp = false;
761 }
762
763 uint8_t const idxRegLoop = iemNativeRegAllocTmp(pReNative, &off);
764 uint16_t const cLoops = cbLeft / 32;
765 cbLeft = cbLeft % 32;
766 pbOpcodes += cLoops * 32;
767 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops);
768
769 if (offPage != 0) /** @todo optimize out this instruction. */
770 {
771 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage);
772 offPage = 0;
773 }
774
775 uint32_t const offLoopStart = off;
776 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0);
777 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0);
778 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
779
780 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1);
781 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1);
782 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
783 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
784
785 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2);
786 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2);
787 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
788 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
789
790 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3);
791 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3);
792 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
793 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
794
795 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
796 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
797
798 /* Advance and loop. */
799 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20);
800 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20);
801 pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/);
802 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off);
803
804 iemNativeRegFreeTmp(pReNative, idxRegLoop);
805 }
806
807 /* Deal with any remaining dwords (uint64_t). There can be up to
808 three if we looped and four if we didn't. */
809 uint32_t offSrc2 = 0;
810 while (cbLeft >= 8)
811 {
812 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val,
813 idxRegSrc1Ptr, offPage / 8);
814 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val,
815 idxRegSrc2Ptr, offSrc2 / 8);
816 if (fPendingJmp)
817 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
818 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
819 else
820 {
821 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
822 fPendingJmp = true;
823 }
824 pbOpcodes += 8;
825 offPage += 8;
826 offSrc2 += 8;
827 cbLeft -= 8;
828 }
829
830 iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr);
831 /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */
832 }
833 /*
834 * Otherwise, we compare with constants and merge with the general mop-up.
835 */
836 else
837 {
838 while (cbLeft >= 8)
839 {
840 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr,
841 offPage / 8);
842 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val,
843 RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4],
844 pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
845 if (fPendingJmp)
846 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
847 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/);
848 else
849 {
850 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/);
851 fPendingJmp = true;
852 }
853 pbOpcodes += 8;
854 offPage += 8;
855 cbLeft -= 8;
856 }
857 /* max cost thus far: 21 */
858 }
859
860 /* Deal with any remaining bytes (7 or less). */
861 Assert(cbLeft < 8);
862 if (cbLeft >= 4)
863 {
864 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr,
865 offPage / 4);
866 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
867 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
868 if (fPendingJmp)
869 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
870 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
871 else
872 {
873 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
874 fPendingJmp = true;
875 }
876 pbOpcodes += 4;
877 offPage += 4;
878 cbLeft -= 4;
879
880 }
881
882 if (cbLeft >= 2)
883 {
884 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr,
885 offPage / 2);
886 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
887 if (fPendingJmp)
888 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
889 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
890 else
891 {
892 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
893 fPendingJmp = true;
894 }
895 pbOpcodes += 2;
896 offPage += 2;
897 cbLeft -= 2;
898 }
899
900 if (cbLeft > 0)
901 {
902 Assert(cbLeft == 1);
903 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
904 if (fPendingJmp)
905 {
906 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]);
907 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
908 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
909 }
910 else
911 {
912 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/);
913 fPendingJmp = true;
914 }
915 pbOpcodes += 1;
916 offPage += 1;
917 cbLeft -= 1;
918 }
919
920 iemNativeRegFreeTmp(pReNative, idxRegSrc2Val);
921 }
922 Assert(cbLeft == 0);
923
924 /*
925 * Finally, the branch on difference.
926 */
927 if (fPendingJmp)
928 {
929 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
930 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
931 }
932 RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump, idxLabelObsoleteTb);
933
934 /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */
935 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
936 iemNativeRegFreeTmp(pReNative, idxRegSrc1Val);
937 iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr);
938
939#else
940# error "Port me"
941#endif
942 return off;
943}
944
945
946/** Duplicated in IEMAllThrdFuncsBltIn.cpp. */
947DECL_FORCE_INLINE(RTGCPHYS) iemTbGetRangePhysPageAddr(PCIEMTB pTb, uint8_t idxRange)
948{
949 Assert(idxRange < RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges)));
950 uint8_t const idxPage = pTb->aRanges[idxRange].idxPhysPage;
951 Assert(idxPage <= RT_ELEMENTS(pTb->aGCPhysPages));
952 if (idxPage == 0)
953 return pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
954 Assert(!(pTb->aGCPhysPages[idxPage - 1] & GUEST_PAGE_OFFSET_MASK));
955 return pTb->aGCPhysPages[idxPage - 1];
956}
957
958
959/**
960 * Macro that implements PC check after a conditional branch.
961 */
962#define BODY_CHECK_PC_AFTER_BRANCH(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
963 RT_NOREF(a_cbInstr); \
964 off = iemNativeEmitBltInCheckPcAfterBranch(pReNative, off, a_pTb, a_idxRange, a_offRange)
965
966DECL_FORCE_INLINE(uint32_t)
967iemNativeEmitBltInCheckPcAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb,
968 uint8_t idxRange, uint16_t offRange)
969{
970#ifdef VBOX_STRICT
971 off = iemNativeEmitMarker(pReNative, off, 0x80000004);
972#endif
973
974 /*
975 * The GCPhysRangePageWithOffset value in the threaded function is a fixed
976 * constant for us here.
977 *
978 * We can pretend that iem.s.cbInstrBufTotal is X86_PAGE_SIZE here, because
979 * it serves no purpose as a CS.LIM, if that's needed we've just performed
980 * it, and as long as we don't implement code TLB reload code here there is
981 * no point in checking that the TLB data we're using is still valid.
982 *
983 * What we to do is.
984 * 1. Calculate the FLAT PC (RIP + CS.BASE).
985 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
986 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
987 * we're in the wrong spot and need to find a new TB.
988 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
989 * GCPhysRangePageWithOffset constant mentioned above.
990 *
991 * The adding of CS.BASE to RIP can be skipped in the first step if we're
992 * in 64-bit code or flat 32-bit.
993 */
994
995 /* Allocate registers for step 1. Get the shadowed stuff before allocating
996 the temp register, so we don't accidentally clobber something we'll be
997 needing again immediately. This is why we get idxRegCsBase here. */
998 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
999 kIemNativeGstRegUse_ReadOnly);
1000 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1001 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1002 kIemNativeGstRegUse_ReadOnly);
1003
1004 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1005
1006#ifdef VBOX_STRICT
1007 /* Do assertions before idxRegTmp contains anything. */
1008 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1009# ifdef RT_ARCH_AMD64
1010 {
1011 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1012 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1013 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1014 {
1015 /* cmp r/m64, imm8 */
1016 pbCodeBuf[off++] = X86_OP_REX_W;
1017 pbCodeBuf[off++] = 0x83;
1018 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1019 pbCodeBuf[off++] = 0;
1020 /* je rel8 */
1021 pbCodeBuf[off++] = 0x74;
1022 pbCodeBuf[off++] = 1;
1023 /* int3 */
1024 pbCodeBuf[off++] = 0xcc;
1025
1026 }
1027
1028 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1029 /* test r/m64, imm32 */
1030 pbCodeBuf[off++] = X86_OP_REX_W;
1031 pbCodeBuf[off++] = 0xf7;
1032 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1033 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1034 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1035 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1036 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1037 /* jz rel8 */
1038 pbCodeBuf[off++] = 0x74;
1039 pbCodeBuf[off++] = 1;
1040 /* int3 */
1041 pbCodeBuf[off++] = 0xcc;
1042 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1043 }
1044# else
1045off = iemNativeEmitBrk(pReNative, off, 0x1234);
1046
1047 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1048 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1049 {
1050 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1051# ifdef RT_ARCH_ARM64
1052 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1053 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, idxRegTmp);
1054 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2004);
1055 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1056# else
1057# error "Port me!"
1058# endif
1059 }
1060# endif
1061
1062#endif /* VBOX_STRICT */
1063
1064 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1065 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1066 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1067 {
1068#ifdef RT_ARCH_ARM64
1069 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1070 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1071 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1072#else
1073 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1074 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1075#endif
1076 }
1077 else
1078 {
1079#ifdef RT_ARCH_ARM64
1080 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1081 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1082 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1083 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1084#else
1085 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1086 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1087 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1088#endif
1089 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1090 }
1091 iemNativeRegFreeTmp(pReNative, idxRegPc);
1092
1093 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal. */
1094 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1095 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_CheckBranchMiss);
1096
1097 /* 4. Add iem.s.GCPhysInstrBuf and compare with GCPhysRangePageWithOffset. */
1098#ifdef RT_ARCH_AMD64
1099 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1100 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1101 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1102 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1103 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1104
1105#elif defined(RT_ARCH_ARM64)
1106 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1107
1108 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1109 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1110 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1111 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1112
1113# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1114 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1115 off = iemNativeEmitJzToFixed(pReNative, off, 1);
1116 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1117# endif
1118 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1119#else
1120# error "Port me"
1121#endif
1122
1123 RTGCPHYS const GCPhysRangePageWithOffset = ( iemTbGetRangePhysPageAddr(pTb, idxRange)
1124 | pTb->aRanges[idxRange].offPhysPage)
1125 + offRange;
1126 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(pReNative, off, idxRegTmp, GCPhysRangePageWithOffset,
1127 kIemNativeLabelType_CheckBranchMiss);
1128
1129 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1130 return off;
1131}
1132
1133
1134#ifdef BODY_CHECK_CS_LIM
1135/**
1136 * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
1137 * raising a \#GP(0) if this isn't the case.
1138 */
1139IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
1140{
1141 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1142 BODY_SET_CUR_INSTR();
1143 BODY_CHECK_CS_LIM(cbInstr);
1144 return off;
1145}
1146#endif
1147
1148
1149#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
1150/**
1151 * Built-in function for re-checking opcodes and CS.LIM after an instruction
1152 * that may have modified them.
1153 */
1154IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
1155{
1156 PCIEMTB const pTb = pReNative->pTbOrg;
1157 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1158 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1159 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1160 BODY_SET_CUR_INSTR();
1161 BODY_CHECK_CS_LIM(cbInstr);
1162 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1163 return off;
1164}
1165#endif
1166
1167
1168#if defined(BODY_CHECK_OPCODES)
1169/**
1170 * Built-in function for re-checking opcodes after an instruction that may have
1171 * modified them.
1172 */
1173IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
1174{
1175 PCIEMTB const pTb = pReNative->pTbOrg;
1176 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1177 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1178 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1179 BODY_SET_CUR_INSTR();
1180 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1181 return off;
1182}
1183#endif
1184
1185
1186#if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1187/**
1188 * Built-in function for re-checking opcodes and considering the need for CS.LIM
1189 * checking after an instruction that may have modified them.
1190 */
1191IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
1192{
1193 PCIEMTB const pTb = pReNative->pTbOrg;
1194 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1195 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1196 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1197 BODY_SET_CUR_INSTR();
1198 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1199 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1200 return off;
1201}
1202#endif
1203
1204
1205/*
1206 * Post-branching checkers.
1207 */
1208
1209#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1210/**
1211 * Built-in function for checking CS.LIM, checking the PC and checking opcodes
1212 * after conditional branching within the same page.
1213 *
1214 * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
1215 */
1216IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
1217{
1218 PCIEMTB const pTb = pReNative->pTbOrg;
1219 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1220 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1221 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1222 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1223 BODY_SET_CUR_INSTR();
1224 BODY_CHECK_CS_LIM(cbInstr);
1225 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1226 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1227 //LogFunc(("okay\n"));
1228 return off;
1229}
1230#endif
1231
1232
1233#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
1234/**
1235 * Built-in function for checking the PC and checking opcodes after conditional
1236 * branching within the same page.
1237 *
1238 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1239 */
1240IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
1241{
1242 PCIEMTB const pTb = pReNative->pTbOrg;
1243 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1244 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1245 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1246 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1247 BODY_SET_CUR_INSTR();
1248 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1249 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1250 //LogFunc(("okay\n"));
1251 return off;
1252}
1253#endif
1254
1255
1256#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1257/**
1258 * Built-in function for checking the PC and checking opcodes and considering
1259 * the need for CS.LIM checking after conditional branching within the same
1260 * page.
1261 *
1262 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1263 */
1264IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
1265{
1266 PCIEMTB const pTb = pReNative->pTbOrg;
1267 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1268 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1269 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1270 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1271 BODY_SET_CUR_INSTR();
1272 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1273 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1274 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1275 //LogFunc(("okay\n"));
1276 return off;
1277}
1278#endif
1279
1280
1281#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1282/**
1283 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1284 * transitioning to a different code page.
1285 *
1286 * The code page transition can either be natural over onto the next page (with
1287 * the instruction starting at page offset zero) or by means of branching.
1288 *
1289 * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1290 */
1291IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
1292{
1293 PCIEMTB const pTb = pReNative->pTbOrg;
1294 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1295 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1296 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1297 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1298 BODY_SET_CUR_INSTR();
1299 BODY_CHECK_CS_LIM(cbInstr);
1300 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1301 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1302 //LogFunc(("okay\n"));
1303 return off;
1304}
1305#endif
1306
1307
1308#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
1309/**
1310 * Built-in function for loading TLB and checking opcodes when transitioning to
1311 * a different code page.
1312 *
1313 * The code page transition can either be natural over onto the next page (with
1314 * the instruction starting at page offset zero) or by means of branching.
1315 *
1316 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1317 */
1318IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
1319{
1320 PCIEMTB const pTb = pReNative->pTbOrg;
1321 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1322 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1323 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1324 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1325 BODY_SET_CUR_INSTR();
1326 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1327 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1328 //LogFunc(("okay\n"));
1329 return off;
1330}
1331#endif
1332
1333
1334#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1335/**
1336 * Built-in function for loading TLB and checking opcodes and considering the
1337 * need for CS.LIM checking when transitioning to a different code page.
1338 *
1339 * The code page transition can either be natural over onto the next page (with
1340 * the instruction starting at page offset zero) or by means of branching.
1341 *
1342 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1343 */
1344IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
1345{
1346 PCIEMTB const pTb = pReNative->pTbOrg;
1347 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1348 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1349 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1350 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1351 BODY_SET_CUR_INSTR();
1352 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1353 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
1354 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1355 //LogFunc(("okay\n"));
1356 return off;
1357}
1358#endif
1359
1360
1361
1362/*
1363 * Natural page crossing checkers.
1364 */
1365
1366#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1367/**
1368 * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
1369 * both pages when transitioning to a different code page.
1370 *
1371 * This is used when the previous instruction requires revalidation of opcodes
1372 * bytes and the current instruction stries a page boundrary with opcode bytes
1373 * in both the old and new page.
1374 *
1375 * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1376 */
1377IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
1378{
1379 PCIEMTB const pTb = pReNative->pTbOrg;
1380 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1381 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1382 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1383 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1384 uint32_t const idxRange2 = idxRange1 + 1;
1385 BODY_SET_CUR_INSTR();
1386 BODY_CHECK_CS_LIM(cbInstr);
1387 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1388 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1389 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1390 return off;
1391}
1392#endif
1393
1394
1395#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1396/**
1397 * Built-in function for loading TLB and checking opcodes on both pages when
1398 * transitioning to a different code page.
1399 *
1400 * This is used when the previous instruction requires revalidation of opcodes
1401 * bytes and the current instruction stries a page boundrary with opcode bytes
1402 * in both the old and new page.
1403 *
1404 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1405 */
1406IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
1407{
1408 PCIEMTB const pTb = pReNative->pTbOrg;
1409 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1410 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1411 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1412 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1413 uint32_t const idxRange2 = idxRange1 + 1;
1414 BODY_SET_CUR_INSTR();
1415 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1416 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1417 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1418 return off;
1419}
1420#endif
1421
1422
1423#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1424/**
1425 * Built-in function for loading TLB and checking opcodes on both pages and
1426 * considering the need for CS.LIM checking when transitioning to a different
1427 * code page.
1428 *
1429 * This is used when the previous instruction requires revalidation of opcodes
1430 * bytes and the current instruction stries a page boundrary with opcode bytes
1431 * in both the old and new page.
1432 *
1433 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1434 */
1435IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
1436{
1437 PCIEMTB const pTb = pReNative->pTbOrg;
1438 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1439 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1440 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1441 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1442 uint32_t const idxRange2 = idxRange1 + 1;
1443 BODY_SET_CUR_INSTR();
1444 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1445 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
1446 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1447 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1448 return off;
1449}
1450#endif
1451
1452
1453#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1454/**
1455 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1456 * advancing naturally to a different code page.
1457 *
1458 * Only opcodes on the new page is checked.
1459 *
1460 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1461 */
1462IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
1463{
1464 PCIEMTB const pTb = pReNative->pTbOrg;
1465 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1466 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1467 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1468 //uint32_t const offRange1 = (uint32_t)uParam2;
1469 uint32_t const idxRange2 = idxRange1 + 1;
1470 BODY_SET_CUR_INSTR();
1471 BODY_CHECK_CS_LIM(cbInstr);
1472 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1473 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1474 return off;
1475}
1476#endif
1477
1478
1479#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1480/**
1481 * Built-in function for loading TLB and checking opcodes when advancing
1482 * naturally to a different code page.
1483 *
1484 * Only opcodes on the new page is checked.
1485 *
1486 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1487 */
1488IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
1489{
1490 PCIEMTB const pTb = pReNative->pTbOrg;
1491 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1492 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1493 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1494 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1495 uint32_t const idxRange2 = idxRange1 + 1;
1496 BODY_SET_CUR_INSTR();
1497 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1498 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1499 return off;
1500}
1501#endif
1502
1503
1504#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1505/**
1506 * Built-in function for loading TLB and checking opcodes and considering the
1507 * need for CS.LIM checking when advancing naturally to a different code page.
1508 *
1509 * Only opcodes on the new page is checked.
1510 *
1511 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1512 */
1513IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
1514{
1515 PCIEMTB const pTb = pReNative->pTbOrg;
1516 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1517 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
1518 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
1519 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
1520 uint32_t const idxRange2 = idxRange1 + 1;
1521 BODY_SET_CUR_INSTR();
1522 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1523 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
1524 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
1525 return off;
1526}
1527#endif
1528
1529
1530#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
1531/**
1532 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1533 * advancing naturally to a different code page with first instr at byte 0.
1534 *
1535 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1536 */
1537IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
1538{
1539 PCIEMTB const pTb = pReNative->pTbOrg;
1540 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1541 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1542 BODY_SET_CUR_INSTR();
1543 BODY_CHECK_CS_LIM(cbInstr);
1544 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1545 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1546 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1547 return off;
1548}
1549#endif
1550
1551
1552#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
1553/**
1554 * Built-in function for loading TLB and checking opcodes when advancing
1555 * naturally to a different code page with first instr at byte 0.
1556 *
1557 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1558 */
1559IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
1560{
1561 PCIEMTB const pTb = pReNative->pTbOrg;
1562 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1563 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1564 BODY_SET_CUR_INSTR();
1565 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1566 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1567 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1568 return off;
1569}
1570#endif
1571
1572
1573#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1574/**
1575 * Built-in function for loading TLB and checking opcodes and considering the
1576 * need for CS.LIM checking when advancing naturally to a different code page
1577 * with first instr at byte 0.
1578 *
1579 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1580 */
1581IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
1582{
1583 PCIEMTB const pTb = pReNative->pTbOrg;
1584 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
1585 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1586 BODY_SET_CUR_INSTR();
1587 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1588 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
1589 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
1590 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
1591 return off;
1592}
1593#endif
1594
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette