1 | /* $Id: IEMAllN8veRecompBltIn.cpp 102634 2023-12-18 14:24:23Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * IEM - Native Recompiler, Emitters for Built-In Threaded Functions.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2023 Oracle and/or its affiliates.
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox base platform packages, as
|
---|
10 | * available from https://www.virtualbox.org.
|
---|
11 | *
|
---|
12 | * This program is free software; you can redistribute it and/or
|
---|
13 | * modify it under the terms of the GNU General Public License
|
---|
14 | * as published by the Free Software Foundation, in version 3 of the
|
---|
15 | * License.
|
---|
16 | *
|
---|
17 | * This program is distributed in the hope that it will be useful, but
|
---|
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | * General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU General Public License
|
---|
23 | * along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
24 | *
|
---|
25 | * SPDX-License-Identifier: GPL-3.0-only
|
---|
26 | */
|
---|
27 |
|
---|
28 |
|
---|
29 | /*********************************************************************************************************************************
|
---|
30 | * Header Files *
|
---|
31 | *********************************************************************************************************************************/
|
---|
32 | #define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
|
---|
33 | #define IEM_WITH_OPAQUE_DECODER_STATE
|
---|
34 | #define VMCPU_INCL_CPUM_GST_CTX
|
---|
35 | #define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
|
---|
36 | #include <VBox/vmm/iem.h>
|
---|
37 | #include <VBox/vmm/cpum.h>
|
---|
38 | #include <VBox/vmm/dbgf.h>
|
---|
39 | #include "IEMInternal.h"
|
---|
40 | #include <VBox/vmm/vmcc.h>
|
---|
41 | #include <VBox/log.h>
|
---|
42 | #include <VBox/err.h>
|
---|
43 | #include <VBox/param.h>
|
---|
44 | #include <iprt/assert.h>
|
---|
45 | #include <iprt/string.h>
|
---|
46 | #if defined(RT_ARCH_AMD64)
|
---|
47 | # include <iprt/x86.h>
|
---|
48 | #elif defined(RT_ARCH_ARM64)
|
---|
49 | # include <iprt/armv8.h>
|
---|
50 | #endif
|
---|
51 |
|
---|
52 |
|
---|
53 | #include "IEMInline.h"
|
---|
54 | #include "IEMThreadedFunctions.h"
|
---|
55 | #include "IEMN8veRecompiler.h"
|
---|
56 | #include "IEMN8veRecompilerEmit.h"
|
---|
57 |
|
---|
58 |
|
---|
59 |
|
---|
60 | /*********************************************************************************************************************************
|
---|
61 | * TB Helper Functions *
|
---|
62 | *********************************************************************************************************************************/
|
---|
63 |
|
---|
64 |
|
---|
65 |
|
---|
66 | /*********************************************************************************************************************************
|
---|
67 | * Builtin functions *
|
---|
68 | *********************************************************************************************************************************/
|
---|
69 |
|
---|
70 | /**
|
---|
71 | * Built-in function that calls a C-implemention function taking zero arguments.
|
---|
72 | */
|
---|
73 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
|
---|
74 | {
|
---|
75 | PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
|
---|
76 | uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
|
---|
77 | uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
|
---|
78 | return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
|
---|
79 | }
|
---|
80 |
|
---|
81 |
|
---|
82 | /**
|
---|
83 | * Built-in function that checks for pending interrupts that can be delivered or
|
---|
84 | * forced action flags.
|
---|
85 | *
|
---|
86 | * This triggers after the completion of an instruction, so EIP is already at
|
---|
87 | * the next instruction. If an IRQ or important FF is pending, this will return
|
---|
88 | * a non-zero status that stops TB execution.
|
---|
89 | */
|
---|
90 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
|
---|
91 | {
|
---|
92 | RT_NOREF(pCallEntry);
|
---|
93 |
|
---|
94 | /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
|
---|
95 | and I'm too lazy to create a 'Fixed' version of that one. */
|
---|
96 | uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
|
---|
97 | UINT32_MAX, pReNative->uCheckIrqSeqNo++);
|
---|
98 |
|
---|
99 | uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
|
---|
100 |
|
---|
101 | /* Again, we need to load the extended EFLAGS before we actually need them
|
---|
102 | in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
|
---|
103 | loaded them inside the check, as the shadow state would not be correct
|
---|
104 | when the code branches before the load. Ditto PC. */
|
---|
105 | uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
|
---|
106 | kIemNativeGstRegUse_ReadOnly);
|
---|
107 |
|
---|
108 | uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
|
---|
109 |
|
---|
110 | uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
|
---|
111 |
|
---|
112 | /*
|
---|
113 | * Start by checking the local forced actions of the EMT we're on for IRQs
|
---|
114 | * and other FFs that needs servicing.
|
---|
115 | */
|
---|
116 | /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
|
---|
117 | /* Load FFs in to idxTmpReg and AND with all relevant flags. */
|
---|
118 | off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
|
---|
119 | off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
|
---|
120 | VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
|
---|
121 | | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
|
---|
122 | | VMCPU_FF_TLB_FLUSH
|
---|
123 | | VMCPU_FF_UNHALT ),
|
---|
124 | true /*fSetFlags*/);
|
---|
125 | /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
|
---|
126 | uint32_t const offFixupJumpToVmCheck1 = off;
|
---|
127 | off = iemNativeEmitJzToFixed(pReNative, off, 0);
|
---|
128 |
|
---|
129 | /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
|
---|
130 | these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
|
---|
131 | off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
|
---|
132 | ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
|
---|
133 | /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
|
---|
134 | off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
|
---|
135 |
|
---|
136 | /* So, it's only interrupt releated FFs and we need to see if IRQs are being
|
---|
137 | suppressed by the CPU or not. */
|
---|
138 | off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
|
---|
139 | off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
|
---|
140 | idxLabelReturnBreak);
|
---|
141 |
|
---|
142 | /* We've got shadow flags set, so we must check that the PC they are valid
|
---|
143 | for matches our current PC value. */
|
---|
144 | /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
|
---|
145 | * a register. */
|
---|
146 | off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
|
---|
147 | off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
|
---|
148 |
|
---|
149 | /*
|
---|
150 | * Now check the force flags of the VM.
|
---|
151 | */
|
---|
152 | iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
|
---|
153 | iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
|
---|
154 | off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
|
---|
155 | off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
|
---|
156 | off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
|
---|
157 | off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
|
---|
158 |
|
---|
159 | /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
|
---|
160 |
|
---|
161 | /*
|
---|
162 | * We're good, no IRQs or FFs pending.
|
---|
163 | */
|
---|
164 | iemNativeRegFreeTmp(pReNative, idxTmpReg);
|
---|
165 | iemNativeRegFreeTmp(pReNative, idxEflReg);
|
---|
166 | iemNativeRegFreeTmp(pReNative, idxPcReg);
|
---|
167 |
|
---|
168 | return off;
|
---|
169 | }
|
---|
170 |
|
---|
171 |
|
---|
172 | /**
|
---|
173 | * Built-in function checks if IEMCPU::fExec has the expected value.
|
---|
174 | */
|
---|
175 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
|
---|
176 | {
|
---|
177 | uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
|
---|
178 | uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
|
---|
179 |
|
---|
180 | off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
|
---|
181 | off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
|
---|
182 | off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
|
---|
183 | kIemNativeLabelType_ReturnBreak);
|
---|
184 | iemNativeRegFreeTmp(pReNative, idxTmpReg);
|
---|
185 | return off;
|
---|
186 | }
|
---|
187 |
|
---|
188 |
|
---|
189 | /**
|
---|
190 | * Sets idxTbCurInstr in preparation of raising an exception.
|
---|
191 | */
|
---|
192 | /** @todo Optimize this, so we don't set the same value more than once. Just
|
---|
193 | * needs some tracking. */
|
---|
194 | #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
|
---|
195 | # define BODY_SET_CUR_INSTR() \
|
---|
196 | off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
|
---|
197 | #else
|
---|
198 | # define BODY_SET_CUR_INSTR() ((void)0)
|
---|
199 | #endif
|
---|
200 |
|
---|
201 |
|
---|
202 | /**
|
---|
203 | * Macro that emits the 16/32-bit CS.LIM check.
|
---|
204 | */
|
---|
205 | #define BODY_CHECK_CS_LIM(a_cbInstr) \
|
---|
206 | off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
|
---|
207 |
|
---|
208 | DECL_FORCE_INLINE(uint32_t)
|
---|
209 | iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
|
---|
210 | {
|
---|
211 | Assert(cbInstr > 0);
|
---|
212 | Assert(cbInstr < 16);
|
---|
213 |
|
---|
214 | /*
|
---|
215 | * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
|
---|
216 | * a temporary register for calculating the last address of the instruction.
|
---|
217 | *
|
---|
218 | * The calculation and comparisons are 32-bit. We ASSUME that the incoming
|
---|
219 | * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
|
---|
220 | * that last updated EIP here checked it already, and that we're therefore
|
---|
221 | * safe in the 32-bit wrap-around scenario to only check that the last byte
|
---|
222 | * is within CS.LIM. In the case of instruction-by-instruction advancing
|
---|
223 | * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
|
---|
224 | * must be using 4KB granularity and the previous instruction was fine.
|
---|
225 | */
|
---|
226 | uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
|
---|
227 | kIemNativeGstRegUse_ReadOnly);
|
---|
228 | uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
|
---|
229 | kIemNativeGstRegUse_ReadOnly);
|
---|
230 | #ifdef RT_ARCH_AMD64
|
---|
231 | uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
|
---|
232 | #elif defined(RT_ARCH_ARM64)
|
---|
233 | uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
|
---|
234 | #else
|
---|
235 | # error "Port me"
|
---|
236 | #endif
|
---|
237 |
|
---|
238 | if (cbInstr != 1)
|
---|
239 | {
|
---|
240 | uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
|
---|
241 |
|
---|
242 | /*
|
---|
243 | * 1. idxRegTmp = idxRegPc + cbInstr;
|
---|
244 | * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
|
---|
245 | */
|
---|
246 | #ifdef RT_ARCH_AMD64
|
---|
247 | /* 1. lea tmp32, [Pc + cbInstr - 1] */
|
---|
248 | if (idxRegTmp >= 8 || idxRegPc >= 8)
|
---|
249 | pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
|
---|
250 | pbCodeBuf[off++] = 0x8d;
|
---|
251 | pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
|
---|
252 | if ((idxRegPc & 7) == X86_GREG_xSP)
|
---|
253 | pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
|
---|
254 | pbCodeBuf[off++] = cbInstr - 1;
|
---|
255 |
|
---|
256 | /* 2. cmp tmp32(r), CsLim(r/m). */
|
---|
257 | if (idxRegTmp >= 8 || idxRegCsLim >= 8)
|
---|
258 | pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
|
---|
259 | pbCodeBuf[off++] = 0x3b;
|
---|
260 | pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
|
---|
261 |
|
---|
262 | #elif defined(RT_ARCH_ARM64)
|
---|
263 | /* 1. add tmp32, Pc, #cbInstr-1 */
|
---|
264 | pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
|
---|
265 | /* 2. cmp tmp32, CsLim */
|
---|
266 | pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
|
---|
267 | false /*f64Bit*/, true /*fSetFlags*/);
|
---|
268 |
|
---|
269 | #endif
|
---|
270 | iemNativeRegFreeTmp(pReNative, idxRegTmp);
|
---|
271 | }
|
---|
272 | else
|
---|
273 | {
|
---|
274 | /*
|
---|
275 | * Here we can skip step 1 and compare PC and CS.LIM directly.
|
---|
276 | */
|
---|
277 | #ifdef RT_ARCH_AMD64
|
---|
278 | /* 2. cmp eip(r), CsLim(r/m). */
|
---|
279 | if (idxRegPc >= 8 || idxRegCsLim >= 8)
|
---|
280 | pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
|
---|
281 | pbCodeBuf[off++] = 0x3b;
|
---|
282 | pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
|
---|
283 |
|
---|
284 | #elif defined(RT_ARCH_ARM64)
|
---|
285 | /* 2. cmp Pc, CsLim */
|
---|
286 | pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
|
---|
287 | false /*f64Bit*/, true /*fSetFlags*/);
|
---|
288 |
|
---|
289 | #endif
|
---|
290 | }
|
---|
291 |
|
---|
292 | /* 3. Jump if greater. */
|
---|
293 | off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
|
---|
294 |
|
---|
295 | iemNativeRegFreeTmp(pReNative, idxRegCsLim);
|
---|
296 | iemNativeRegFreeTmp(pReNative, idxRegPc);
|
---|
297 | return off;
|
---|
298 | }
|
---|
299 |
|
---|
300 |
|
---|
301 | /**
|
---|
302 | * Macro that considers whether we need CS.LIM checking after a branch or
|
---|
303 | * crossing over to a new page.
|
---|
304 | */
|
---|
305 | #define BODY_CONSIDER_CS_LIM_CHECKING(a_pTb, a_cbInstr) \
|
---|
306 | RT_NOREF(cbInstr); \
|
---|
307 | off = iemNativeEmitBltInConsiderLimChecking(pReNative, off)
|
---|
308 |
|
---|
309 | DECL_FORCE_INLINE(uint32_t)
|
---|
310 | iemNativeEmitBltInConsiderLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off)
|
---|
311 | {
|
---|
312 | /*
|
---|
313 | * This check must match the ones in the iem in iemGetTbFlagsForCurrentPc
|
---|
314 | * exactly:
|
---|
315 | *
|
---|
316 | * int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
|
---|
317 | * if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
|
---|
318 | * return fRet;
|
---|
319 | * return fRet | IEMTB_F_CS_LIM_CHECKS;
|
---|
320 | *
|
---|
321 | *
|
---|
322 | * We need EIP, CS.LIM and CS.BASE here.
|
---|
323 | */
|
---|
324 |
|
---|
325 | /* Calculate the offFromLim first: */
|
---|
326 | uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
|
---|
327 | kIemNativeGstRegUse_ReadOnly);
|
---|
328 | uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
|
---|
329 | kIemNativeGstRegUse_ReadOnly);
|
---|
330 | uint8_t const idxRegLeft = iemNativeRegAllocTmp(pReNative, &off);
|
---|
331 |
|
---|
332 | #ifdef RT_ARCH_ARM64
|
---|
333 | uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
|
---|
334 | pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegLeft, idxRegCsLim, idxRegPc);
|
---|
335 | IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
|
---|
336 | #else
|
---|
337 | off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegLeft, idxRegCsLim);
|
---|
338 | off = iemNativeEmitSubTwoGprs(pReNative, off, idxRegLeft, idxRegPc);
|
---|
339 | #endif
|
---|
340 |
|
---|
341 | iemNativeRegFreeTmp(pReNative, idxRegCsLim);
|
---|
342 | iemNativeRegFreeTmp(pReNative, idxRegPc);
|
---|
343 |
|
---|
344 | /* Calculate the threshold level (right side). */
|
---|
345 | uint8_t const idxRegCsBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
|
---|
346 | kIemNativeGstRegUse_ReadOnly);
|
---|
347 | uint8_t const idxRegRight = iemNativeRegAllocTmp(pReNative, &off);
|
---|
348 |
|
---|
349 | #ifdef RT_ARCH_ARM64
|
---|
350 | pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
|
---|
351 | Assert(Armv8A64ConvertImmRImmS2Mask32(11, 0) == GUEST_PAGE_OFFSET_MASK);
|
---|
352 | pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegRight, idxRegCsBase, 11, 0, false /*f64Bit*/);
|
---|
353 | pu32CodeBuf[off++] = Armv8A64MkInstrNeg(idxRegRight);
|
---|
354 | pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
|
---|
355 | pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
|
---|
356 | IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
|
---|
357 |
|
---|
358 | #else
|
---|
359 | off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegRight, GUEST_PAGE_OFFSET_MASK);
|
---|
360 | off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegRight, idxRegCsBase);
|
---|
361 | off = iemNativeEmitNegGpr(pReNative, off, idxRegRight);
|
---|
362 | off = iemNativeEmitAddGprImm(pReNative, off, idxRegRight, X86_PAGE_SIZE + 16);
|
---|
363 | #endif
|
---|
364 |
|
---|
365 | iemNativeRegFreeTmp(pReNative, idxRegCsBase);
|
---|
366 |
|
---|
367 | /* Compare the two and jump out if we're too close to the limit. */
|
---|
368 | off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegLeft, idxRegRight);
|
---|
369 | off = iemNativeEmitJlToNewLabel(pReNative, off, kIemNativeLabelType_NeedCsLimChecking);
|
---|
370 |
|
---|
371 | iemNativeRegFreeTmp(pReNative, idxRegRight);
|
---|
372 | iemNativeRegFreeTmp(pReNative, idxRegLeft);
|
---|
373 | return off;
|
---|
374 | }
|
---|
375 |
|
---|
376 |
|
---|
377 |
|
---|
378 | /**
|
---|
379 | * Macro that implements opcode (re-)checking.
|
---|
380 | */
|
---|
381 | #define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
|
---|
382 | RT_NOREF(cbInstr); \
|
---|
383 | off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
|
---|
384 |
|
---|
385 | DECL_FORCE_INLINE(uint32_t)
|
---|
386 | iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
|
---|
387 | {
|
---|
388 | Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
|
---|
389 | Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
|
---|
390 |
|
---|
391 | uint32_t const idxLabelObsoleteTb = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ObsoleteTb);
|
---|
392 |
|
---|
393 | /*
|
---|
394 | * Where to start and how much to compare.
|
---|
395 | *
|
---|
396 | * Looking at the ranges produced when r160746 was running a DOS VM with TB
|
---|
397 | * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
|
---|
398 | * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
|
---|
399 | *
|
---|
400 | * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
|
---|
401 | * 12, 10, 11, 6, 13, 15 and 16. Max 0x359 bytes. Same revision as above.
|
---|
402 | */
|
---|
403 | uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange;
|
---|
404 | uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange;
|
---|
405 | Assert(cbLeft > 0);
|
---|
406 | uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes];
|
---|
407 | uint32_t offConsolidatedJump = UINT32_MAX;
|
---|
408 |
|
---|
409 | #ifdef RT_ARCH_AMD64
|
---|
410 | /* AMD64/x86 offers a bunch of options. Smaller stuff will can be
|
---|
411 | completely inlined, for larger we use REPE CMPS. */
|
---|
412 | # define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */ do { \
|
---|
413 | pbCodeBuf[off++] = a_bOpcode; \
|
---|
414 | Assert(offPage < 127); \
|
---|
415 | pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
|
---|
416 | pbCodeBuf[off++] = RT_BYTE1(offPage); \
|
---|
417 | } while (0)
|
---|
418 |
|
---|
419 | # define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
|
---|
420 | if (offConsolidatedJump != UINT32_MAX) \
|
---|
421 | { \
|
---|
422 | int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
|
---|
423 | Assert(offDisp >= -128); \
|
---|
424 | pbCodeBuf[off++] = 0x75; /* jnz near */ \
|
---|
425 | pbCodeBuf[off++] = (uint8_t)offDisp; \
|
---|
426 | } \
|
---|
427 | else \
|
---|
428 | { \
|
---|
429 | pbCodeBuf[off++] = 0x74; /* jz near +5 */ \
|
---|
430 | pbCodeBuf[off++] = 0x05; \
|
---|
431 | offConsolidatedJump = off; \
|
---|
432 | pbCodeBuf[off++] = 0xe9; /* jmp rel32 */ \
|
---|
433 | iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_Rel32, -4); \
|
---|
434 | pbCodeBuf[off++] = 0x00; \
|
---|
435 | pbCodeBuf[off++] = 0x00; \
|
---|
436 | pbCodeBuf[off++] = 0x00; \
|
---|
437 | pbCodeBuf[off++] = 0x00; \
|
---|
438 | } \
|
---|
439 | } while (0)
|
---|
440 |
|
---|
441 | # define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
|
---|
442 | CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
|
---|
443 | pbCodeBuf[off++] = *pbOpcodes++; \
|
---|
444 | pbCodeBuf[off++] = *pbOpcodes++; \
|
---|
445 | pbCodeBuf[off++] = *pbOpcodes++; \
|
---|
446 | pbCodeBuf[off++] = *pbOpcodes++; \
|
---|
447 | cbLeft -= 4; \
|
---|
448 | offPage += 4; \
|
---|
449 | CHECK_OPCODES_CMP_JMP(); \
|
---|
450 | } while (0)
|
---|
451 |
|
---|
452 | # define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
|
---|
453 | pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
|
---|
454 | CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
|
---|
455 | pbCodeBuf[off++] = *pbOpcodes++; \
|
---|
456 | pbCodeBuf[off++] = *pbOpcodes++; \
|
---|
457 | cbLeft -= 2; \
|
---|
458 | offPage += 2; \
|
---|
459 | CHECK_OPCODES_CMP_JMP(); \
|
---|
460 | } while (0)
|
---|
461 |
|
---|
462 | # define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
|
---|
463 | CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
|
---|
464 | pbCodeBuf[off++] = *pbOpcodes++; \
|
---|
465 | cbLeft -= 1; \
|
---|
466 | offPage += 1; \
|
---|
467 | CHECK_OPCODES_CMP_JMP(); \
|
---|
468 | } while (0)
|
---|
469 |
|
---|
470 | # define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
|
---|
471 | if (a_bPrefix) \
|
---|
472 | pbCodeBuf[off++] = (a_bPrefix); \
|
---|
473 | pbCodeBuf[off++] = (a_bOpcode); \
|
---|
474 | CHECK_OPCODES_CMP_JMP(); \
|
---|
475 | cbLeft -= (a_cbToSubtract); \
|
---|
476 | } while (0)
|
---|
477 |
|
---|
478 | # define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
|
---|
479 | pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
|
---|
480 | pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
|
---|
481 | pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
|
---|
482 | pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
|
---|
483 | pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
|
---|
484 | } while (0)
|
---|
485 |
|
---|
486 | if (cbLeft <= 24)
|
---|
487 | {
|
---|
488 | uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
|
---|
489 | ( RT_BIT_32(X86_GREG_xAX)
|
---|
490 | | RT_BIT_32(X86_GREG_xCX)
|
---|
491 | | RT_BIT_32(X86_GREG_xDX)
|
---|
492 | | RT_BIT_32(X86_GREG_xBX)
|
---|
493 | | RT_BIT_32(X86_GREG_xSI)
|
---|
494 | | RT_BIT_32(X86_GREG_xDI))
|
---|
495 | & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
|
---|
496 | off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
|
---|
497 | if (offPage >= 128 - cbLeft)
|
---|
498 | {
|
---|
499 | off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
|
---|
500 | offPage &= 3;
|
---|
501 | }
|
---|
502 |
|
---|
503 | uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 14 + 54 + 8 + 6 /* = 87 */);
|
---|
504 |
|
---|
505 | if (cbLeft > 8)
|
---|
506 | switch (offPage & 3)
|
---|
507 | {
|
---|
508 | case 0:
|
---|
509 | break;
|
---|
510 | case 1: /* cost: 6 + 8 = 14 */
|
---|
511 | CHECK_OPCODES_CMP_IMM8(idxRegTmp);
|
---|
512 | RT_FALL_THRU();
|
---|
513 | case 2: /* cost: 8 */
|
---|
514 | CHECK_OPCODES_CMP_IMM16(idxRegTmp);
|
---|
515 | break;
|
---|
516 | case 3: /* cost: 6 */
|
---|
517 | CHECK_OPCODES_CMP_IMM8(idxRegTmp);
|
---|
518 | break;
|
---|
519 | }
|
---|
520 |
|
---|
521 | while (cbLeft >= 4)
|
---|
522 | CHECK_OPCODES_CMP_IMM32(idxRegTmp); /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
|
---|
523 |
|
---|
524 | if (cbLeft >= 2)
|
---|
525 | CHECK_OPCODES_CMP_IMM16(idxRegTmp); /* cost: 8 */
|
---|
526 | if (cbLeft)
|
---|
527 | CHECK_OPCODES_CMP_IMM8(idxRegTmp); /* cost: 6 */
|
---|
528 |
|
---|
529 | IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
|
---|
530 | iemNativeRegFreeTmp(pReNative, idxRegTmp);
|
---|
531 | }
|
---|
532 | else
|
---|
533 | {
|
---|
534 | /* RDI = &pbInstrBuf[offPage] */
|
---|
535 | uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
|
---|
536 | off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
|
---|
537 | if (offPage != 0)
|
---|
538 | off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
|
---|
539 |
|
---|
540 | /* RSI = pbOpcodes */
|
---|
541 | uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
|
---|
542 | off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
|
---|
543 |
|
---|
544 | /* RCX = counts. */
|
---|
545 | uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
|
---|
546 |
|
---|
547 | uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 10 + 5 + 5 + 3 + 4 + 3 /*= 35*/);
|
---|
548 |
|
---|
549 | /** @todo profile and optimize this further. Maybe an idea to align by
|
---|
550 | * offPage if the two cannot be reconsidled. */
|
---|
551 | /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
|
---|
552 | switch (offPage & 7) /* max cost: 10 */
|
---|
553 | {
|
---|
554 | case 0:
|
---|
555 | break;
|
---|
556 | case 1: /* cost: 3+4+3 = 10 */
|
---|
557 | CHECK_OPCODES_CMPSX(0xa6, 1, 0);
|
---|
558 | RT_FALL_THRU();
|
---|
559 | case 2: /* cost: 4+3 = 7 */
|
---|
560 | CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
|
---|
561 | CHECK_OPCODES_CMPSX(0xa7, 4, 0);
|
---|
562 | break;
|
---|
563 | case 3: /* cost: 3+3 = 6 */
|
---|
564 | CHECK_OPCODES_CMPSX(0xa6, 1, 0);
|
---|
565 | RT_FALL_THRU();
|
---|
566 | case 4: /* cost: 3 */
|
---|
567 | CHECK_OPCODES_CMPSX(0xa7, 4, 0);
|
---|
568 | break;
|
---|
569 | case 5: /* cost: 3+4 = 7 */
|
---|
570 | CHECK_OPCODES_CMPSX(0xa6, 1, 0);
|
---|
571 | RT_FALL_THRU();
|
---|
572 | case 6: /* cost: 4 */
|
---|
573 | CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
|
---|
574 | break;
|
---|
575 | case 7: /* cost: 3 */
|
---|
576 | CHECK_OPCODES_CMPSX(0xa6, 1, 0);
|
---|
577 | break;
|
---|
578 | }
|
---|
579 |
|
---|
580 | /* Compare qwords: */
|
---|
581 | uint32_t const cQWords = cbLeft >> 3;
|
---|
582 | CHECK_OPCODES_ECX_IMM(cQWords); /* cost: 5 */
|
---|
583 |
|
---|
584 | pbCodeBuf[off++] = X86_OP_PRF_REPZ; /* cost: 5 */
|
---|
585 | CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
|
---|
586 | cbLeft &= 7;
|
---|
587 |
|
---|
588 | if (cbLeft & 4)
|
---|
589 | CHECK_OPCODES_CMPSX(0xa7, 0, 0); /* cost: 3 */
|
---|
590 | if (cbLeft & 2)
|
---|
591 | CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_PRF_SIZE_OP); /* cost: 4 */
|
---|
592 | if (cbLeft & 2)
|
---|
593 | CHECK_OPCODES_CMPSX(0xa6, 0, 0); /* cost: 3 */
|
---|
594 |
|
---|
595 | IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
|
---|
596 | iemNativeRegFreeTmp(pReNative, idxRegCx);
|
---|
597 | iemNativeRegFreeTmp(pReNative, idxRegSi);
|
---|
598 | iemNativeRegFreeTmp(pReNative, idxRegDi);
|
---|
599 | }
|
---|
600 |
|
---|
601 | #elif defined(RT_ARCH_ARM64)
|
---|
602 | /* We need pbInstrBuf in a register, whatever we do. */
|
---|
603 | uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off);
|
---|
604 | off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
|
---|
605 |
|
---|
606 | /* We also need at least one more register for holding bytes & words we
|
---|
607 | load via pbInstrBuf. */
|
---|
608 | uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off);
|
---|
609 |
|
---|
610 | uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
|
---|
611 |
|
---|
612 | /* One byte compare can be done with the opcode byte as an immediate. We'll
|
---|
613 | do this to uint16_t align src1. */
|
---|
614 | bool fPendingJmp = RT_BOOL(offPage & 1);
|
---|
615 | if (fPendingJmp)
|
---|
616 | {
|
---|
617 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
|
---|
618 | pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/);
|
---|
619 | offPage += 1;
|
---|
620 | cbLeft -= 1;
|
---|
621 | }
|
---|
622 |
|
---|
623 | if (cbLeft > 0)
|
---|
624 | {
|
---|
625 | /* We need a register for holding the opcode bytes we're comparing with,
|
---|
626 | as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */
|
---|
627 | uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off);
|
---|
628 |
|
---|
629 | /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */
|
---|
630 | if ((offPage & 3) && cbLeft >= 2)
|
---|
631 | {
|
---|
632 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2);
|
---|
633 | pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
|
---|
634 | if (fPendingJmp)
|
---|
635 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
636 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
|
---|
637 | else
|
---|
638 | {
|
---|
639 | pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
|
---|
640 | fPendingJmp = true;
|
---|
641 | }
|
---|
642 | pbOpcodes += 2;
|
---|
643 | offPage += 2;
|
---|
644 | cbLeft -= 2;
|
---|
645 | }
|
---|
646 |
|
---|
647 | /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */
|
---|
648 | if ((offPage & 7) && cbLeft >= 4)
|
---|
649 | {
|
---|
650 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4);
|
---|
651 | off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
|
---|
652 | RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
|
---|
653 | if (fPendingJmp)
|
---|
654 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
655 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
|
---|
656 | else
|
---|
657 | {
|
---|
658 | pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
|
---|
659 | fPendingJmp = true;
|
---|
660 | }
|
---|
661 | pbOpcodes += 4;
|
---|
662 | offPage += 4;
|
---|
663 | cbLeft -= 4;
|
---|
664 | }
|
---|
665 |
|
---|
666 | /*
|
---|
667 | * If we've got 16 bytes or more left, switch to memcmp-style.
|
---|
668 | */
|
---|
669 | if (cbLeft >= 16)
|
---|
670 | {
|
---|
671 | /* We need a pointer to the copy of the original opcode bytes. */
|
---|
672 | uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off);
|
---|
673 | off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes);
|
---|
674 |
|
---|
675 | /* If there are more than 32 bytes to compare we create a loop, for
|
---|
676 | which we'll need a loop register. */
|
---|
677 | if (cbLeft >= 64)
|
---|
678 | {
|
---|
679 | if (fPendingJmp)
|
---|
680 | {
|
---|
681 | iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
|
---|
682 | pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
|
---|
683 | fPendingJmp = false;
|
---|
684 | }
|
---|
685 |
|
---|
686 | uint8_t const idxRegLoop = iemNativeRegAllocTmp(pReNative, &off);
|
---|
687 | uint16_t const cLoops = cbLeft / 32;
|
---|
688 | cbLeft = cbLeft % 32;
|
---|
689 | pbOpcodes += cLoops * 32;
|
---|
690 | pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops);
|
---|
691 |
|
---|
692 | if (offPage != 0) /** @todo optimize out this instruction. */
|
---|
693 | {
|
---|
694 | pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage);
|
---|
695 | offPage = 0;
|
---|
696 | }
|
---|
697 |
|
---|
698 | uint32_t const offLoopStart = off;
|
---|
699 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0);
|
---|
700 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0);
|
---|
701 | pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
|
---|
702 |
|
---|
703 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1);
|
---|
704 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1);
|
---|
705 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
706 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
|
---|
707 |
|
---|
708 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2);
|
---|
709 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2);
|
---|
710 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
711 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
|
---|
712 |
|
---|
713 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3);
|
---|
714 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3);
|
---|
715 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
716 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
|
---|
717 |
|
---|
718 | iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
|
---|
719 | pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
|
---|
720 |
|
---|
721 | /* Advance and loop. */
|
---|
722 | pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20);
|
---|
723 | pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20);
|
---|
724 | pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/);
|
---|
725 | pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off);
|
---|
726 |
|
---|
727 | iemNativeRegFreeTmp(pReNative, idxRegLoop);
|
---|
728 | }
|
---|
729 |
|
---|
730 | /* Deal with any remaining dwords (uint64_t). There can be up to
|
---|
731 | three if we looped and four if we didn't. */
|
---|
732 | uint32_t offSrc2 = 0;
|
---|
733 | while (cbLeft >= 8)
|
---|
734 | {
|
---|
735 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val,
|
---|
736 | idxRegSrc1Ptr, offPage / 8);
|
---|
737 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val,
|
---|
738 | idxRegSrc2Ptr, offSrc2 / 8);
|
---|
739 | if (fPendingJmp)
|
---|
740 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
741 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
|
---|
742 | else
|
---|
743 | {
|
---|
744 | pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
|
---|
745 | fPendingJmp = true;
|
---|
746 | }
|
---|
747 | pbOpcodes += 8;
|
---|
748 | offPage += 8;
|
---|
749 | offSrc2 += 8;
|
---|
750 | cbLeft -= 8;
|
---|
751 | }
|
---|
752 |
|
---|
753 | iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr);
|
---|
754 | /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */
|
---|
755 | }
|
---|
756 | /*
|
---|
757 | * Otherwise, we compare with constants and merge with the general mop-up.
|
---|
758 | */
|
---|
759 | else
|
---|
760 | {
|
---|
761 | while (cbLeft >= 8)
|
---|
762 | {
|
---|
763 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr,
|
---|
764 | offPage / 8);
|
---|
765 | off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val,
|
---|
766 | RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4],
|
---|
767 | pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
|
---|
768 | if (fPendingJmp)
|
---|
769 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
770 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/);
|
---|
771 | else
|
---|
772 | {
|
---|
773 | pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/);
|
---|
774 | fPendingJmp = true;
|
---|
775 | }
|
---|
776 | pbOpcodes += 8;
|
---|
777 | offPage += 8;
|
---|
778 | cbLeft -= 8;
|
---|
779 | }
|
---|
780 | /* max cost thus far: 21 */
|
---|
781 | }
|
---|
782 |
|
---|
783 | /* Deal with any remaining bytes (7 or less). */
|
---|
784 | Assert(cbLeft < 8);
|
---|
785 | if (cbLeft >= 4)
|
---|
786 | {
|
---|
787 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr,
|
---|
788 | offPage / 4);
|
---|
789 | off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
|
---|
790 | RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
|
---|
791 | if (fPendingJmp)
|
---|
792 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
793 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
|
---|
794 | else
|
---|
795 | {
|
---|
796 | pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
|
---|
797 | fPendingJmp = true;
|
---|
798 | }
|
---|
799 | pbOpcodes += 4;
|
---|
800 | offPage += 4;
|
---|
801 | cbLeft -= 4;
|
---|
802 |
|
---|
803 | }
|
---|
804 |
|
---|
805 | if (cbLeft >= 2)
|
---|
806 | {
|
---|
807 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr,
|
---|
808 | offPage / 2);
|
---|
809 | pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
|
---|
810 | if (fPendingJmp)
|
---|
811 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
812 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
|
---|
813 | else
|
---|
814 | {
|
---|
815 | pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
|
---|
816 | fPendingJmp = true;
|
---|
817 | }
|
---|
818 | pbOpcodes += 2;
|
---|
819 | offPage += 2;
|
---|
820 | cbLeft -= 2;
|
---|
821 | }
|
---|
822 |
|
---|
823 | if (cbLeft > 0)
|
---|
824 | {
|
---|
825 | Assert(cbLeft == 1);
|
---|
826 | pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
|
---|
827 | if (fPendingJmp)
|
---|
828 | {
|
---|
829 | pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]);
|
---|
830 | pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
|
---|
831 | ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
|
---|
832 | }
|
---|
833 | else
|
---|
834 | {
|
---|
835 | pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/);
|
---|
836 | fPendingJmp = true;
|
---|
837 | }
|
---|
838 | pbOpcodes += 1;
|
---|
839 | offPage += 1;
|
---|
840 | cbLeft -= 1;
|
---|
841 | }
|
---|
842 |
|
---|
843 | iemNativeRegFreeTmp(pReNative, idxRegSrc2Val);
|
---|
844 | }
|
---|
845 | Assert(cbLeft == 0);
|
---|
846 |
|
---|
847 | /*
|
---|
848 | * Finally, the branch on difference.
|
---|
849 | */
|
---|
850 | if (fPendingJmp)
|
---|
851 | {
|
---|
852 | iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
|
---|
853 | pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
|
---|
854 | }
|
---|
855 | RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump, idxLabelObsoleteTb);
|
---|
856 |
|
---|
857 | /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */
|
---|
858 | IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
|
---|
859 | iemNativeRegFreeTmp(pReNative, idxRegSrc1Val);
|
---|
860 | iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr);
|
---|
861 |
|
---|
862 | #else
|
---|
863 | # error "Port me"
|
---|
864 | #endif
|
---|
865 | return off;
|
---|
866 | }
|
---|
867 |
|
---|
868 |
|
---|
869 | #ifdef BODY_CHECK_CS_LIM
|
---|
870 | /**
|
---|
871 | * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
|
---|
872 | * raising a \#GP(0) if this isn't the case.
|
---|
873 | */
|
---|
874 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
|
---|
875 | {
|
---|
876 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
877 | BODY_SET_CUR_INSTR();
|
---|
878 | BODY_CHECK_CS_LIM(cbInstr);
|
---|
879 | return off;
|
---|
880 | }
|
---|
881 | #endif
|
---|
882 |
|
---|
883 |
|
---|
884 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
|
---|
885 | /**
|
---|
886 | * Built-in function for re-checking opcodes and CS.LIM after an instruction
|
---|
887 | * that may have modified them.
|
---|
888 | */
|
---|
889 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
|
---|
890 | {
|
---|
891 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
892 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
893 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
894 | uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
|
---|
895 | BODY_SET_CUR_INSTR();
|
---|
896 | BODY_CHECK_CS_LIM(cbInstr);
|
---|
897 | BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
|
---|
898 | return off;
|
---|
899 | }
|
---|
900 | #endif
|
---|
901 |
|
---|
902 |
|
---|
903 | #if defined(BODY_CHECK_OPCODES)
|
---|
904 | /**
|
---|
905 | * Built-in function for re-checking opcodes after an instruction that may have
|
---|
906 | * modified them.
|
---|
907 | */
|
---|
908 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
|
---|
909 | {
|
---|
910 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
911 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
912 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
913 | uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
|
---|
914 | BODY_SET_CUR_INSTR();
|
---|
915 | BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
|
---|
916 | return off;
|
---|
917 | }
|
---|
918 | #endif
|
---|
919 |
|
---|
920 |
|
---|
921 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
|
---|
922 | /**
|
---|
923 | * Built-in function for re-checking opcodes and considering the need for CS.LIM
|
---|
924 | * checking after an instruction that may have modified them.
|
---|
925 | */
|
---|
926 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
|
---|
927 | {
|
---|
928 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
929 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
930 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
931 | uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
|
---|
932 | BODY_SET_CUR_INSTR();
|
---|
933 | BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
|
---|
934 | BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
|
---|
935 | return off;
|
---|
936 | }
|
---|
937 | #endif
|
---|
938 |
|
---|
939 |
|
---|
940 | /*
|
---|
941 | * Post-branching checkers.
|
---|
942 | */
|
---|
943 |
|
---|
944 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
|
---|
945 | /**
|
---|
946 | * Built-in function for checking CS.LIM, checking the PC and checking opcodes
|
---|
947 | * after conditional branching within the same page.
|
---|
948 | *
|
---|
949 | * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
|
---|
950 | */
|
---|
951 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
|
---|
952 | {
|
---|
953 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
954 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
955 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
956 | uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
|
---|
957 | //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
|
---|
958 | BODY_SET_CUR_INSTR();
|
---|
959 | BODY_CHECK_CS_LIM(cbInstr);
|
---|
960 | BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, cbInstr);
|
---|
961 | BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
|
---|
962 | //LogFunc(("okay\n"));
|
---|
963 | return off;
|
---|
964 | }
|
---|
965 | #endif
|
---|
966 |
|
---|
967 |
|
---|
968 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
|
---|
969 | /**
|
---|
970 | * Built-in function for checking the PC and checking opcodes after conditional
|
---|
971 | * branching within the same page.
|
---|
972 | *
|
---|
973 | * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
|
---|
974 | */
|
---|
975 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
|
---|
976 | {
|
---|
977 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
978 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
979 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
980 | uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
|
---|
981 | //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
|
---|
982 | BODY_SET_CUR_INSTR();
|
---|
983 | BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, cbInstr);
|
---|
984 | BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
|
---|
985 | //LogFunc(("okay\n"));
|
---|
986 | return off;
|
---|
987 | }
|
---|
988 | #endif
|
---|
989 |
|
---|
990 |
|
---|
991 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
|
---|
992 | /**
|
---|
993 | * Built-in function for checking the PC and checking opcodes and considering
|
---|
994 | * the need for CS.LIM checking after conditional branching within the same
|
---|
995 | * page.
|
---|
996 | *
|
---|
997 | * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
|
---|
998 | */
|
---|
999 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
|
---|
1000 | {
|
---|
1001 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1002 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1003 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
1004 | uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
|
---|
1005 | //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
|
---|
1006 | BODY_SET_CUR_INSTR();
|
---|
1007 | BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
|
---|
1008 | BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, cbInstr);
|
---|
1009 | BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
|
---|
1010 | //LogFunc(("okay\n"));
|
---|
1011 | return off;
|
---|
1012 | }
|
---|
1013 | #endif
|
---|
1014 |
|
---|
1015 |
|
---|
1016 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
|
---|
1017 | /**
|
---|
1018 | * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
|
---|
1019 | * transitioning to a different code page.
|
---|
1020 | *
|
---|
1021 | * The code page transition can either be natural over onto the next page (with
|
---|
1022 | * the instruction starting at page offset zero) or by means of branching.
|
---|
1023 | *
|
---|
1024 | * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
|
---|
1025 | */
|
---|
1026 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
|
---|
1027 | {
|
---|
1028 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1029 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1030 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
1031 | uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
|
---|
1032 | //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
|
---|
1033 | BODY_SET_CUR_INSTR();
|
---|
1034 | BODY_CHECK_CS_LIM(cbInstr);
|
---|
1035 | BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
|
---|
1036 | BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
|
---|
1037 | //LogFunc(("okay\n"));
|
---|
1038 | return off;
|
---|
1039 | }
|
---|
1040 | #endif
|
---|
1041 |
|
---|
1042 |
|
---|
1043 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
|
---|
1044 | /**
|
---|
1045 | * Built-in function for loading TLB and checking opcodes when transitioning to
|
---|
1046 | * a different code page.
|
---|
1047 | *
|
---|
1048 | * The code page transition can either be natural over onto the next page (with
|
---|
1049 | * the instruction starting at page offset zero) or by means of branching.
|
---|
1050 | *
|
---|
1051 | * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
|
---|
1052 | */
|
---|
1053 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
|
---|
1054 | {
|
---|
1055 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1056 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1057 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
1058 | uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
|
---|
1059 | //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
|
---|
1060 | BODY_SET_CUR_INSTR();
|
---|
1061 | BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
|
---|
1062 | BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
|
---|
1063 | //LogFunc(("okay\n"));
|
---|
1064 | return off;
|
---|
1065 | }
|
---|
1066 | #endif
|
---|
1067 |
|
---|
1068 |
|
---|
1069 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
|
---|
1070 | /**
|
---|
1071 | * Built-in function for loading TLB and checking opcodes and considering the
|
---|
1072 | * need for CS.LIM checking when transitioning to a different code page.
|
---|
1073 | *
|
---|
1074 | * The code page transition can either be natural over onto the next page (with
|
---|
1075 | * the instruction starting at page offset zero) or by means of branching.
|
---|
1076 | *
|
---|
1077 | * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
|
---|
1078 | */
|
---|
1079 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
|
---|
1080 | {
|
---|
1081 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1082 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1083 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
1084 | uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
|
---|
1085 | //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
|
---|
1086 | BODY_SET_CUR_INSTR();
|
---|
1087 | BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
|
---|
1088 | BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
|
---|
1089 | BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
|
---|
1090 | //LogFunc(("okay\n"));
|
---|
1091 | return off;
|
---|
1092 | }
|
---|
1093 | #endif
|
---|
1094 |
|
---|
1095 |
|
---|
1096 |
|
---|
1097 | /*
|
---|
1098 | * Natural page crossing checkers.
|
---|
1099 | */
|
---|
1100 |
|
---|
1101 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
|
---|
1102 | /**
|
---|
1103 | * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
|
---|
1104 | * both pages when transitioning to a different code page.
|
---|
1105 | *
|
---|
1106 | * This is used when the previous instruction requires revalidation of opcodes
|
---|
1107 | * bytes and the current instruction stries a page boundrary with opcode bytes
|
---|
1108 | * in both the old and new page.
|
---|
1109 | *
|
---|
1110 | * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
|
---|
1111 | */
|
---|
1112 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
|
---|
1113 | {
|
---|
1114 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1115 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1116 | uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
|
---|
1117 | uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
|
---|
1118 | uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
|
---|
1119 | uint32_t const idxRange2 = idxRange1 + 1;
|
---|
1120 | BODY_SET_CUR_INSTR();
|
---|
1121 | BODY_CHECK_CS_LIM(cbInstr);
|
---|
1122 | BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
|
---|
1123 | BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
|
---|
1124 | BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
|
---|
1125 | return off;
|
---|
1126 | }
|
---|
1127 | #endif
|
---|
1128 |
|
---|
1129 |
|
---|
1130 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
|
---|
1131 | /**
|
---|
1132 | * Built-in function for loading TLB and checking opcodes on both pages when
|
---|
1133 | * transitioning to a different code page.
|
---|
1134 | *
|
---|
1135 | * This is used when the previous instruction requires revalidation of opcodes
|
---|
1136 | * bytes and the current instruction stries a page boundrary with opcode bytes
|
---|
1137 | * in both the old and new page.
|
---|
1138 | *
|
---|
1139 | * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
|
---|
1140 | */
|
---|
1141 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
|
---|
1142 | {
|
---|
1143 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1144 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1145 | uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
|
---|
1146 | uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
|
---|
1147 | uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
|
---|
1148 | uint32_t const idxRange2 = idxRange1 + 1;
|
---|
1149 | BODY_SET_CUR_INSTR();
|
---|
1150 | BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
|
---|
1151 | BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
|
---|
1152 | BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
|
---|
1153 | return off;
|
---|
1154 | }
|
---|
1155 | #endif
|
---|
1156 |
|
---|
1157 |
|
---|
1158 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
|
---|
1159 | /**
|
---|
1160 | * Built-in function for loading TLB and checking opcodes on both pages and
|
---|
1161 | * considering the need for CS.LIM checking when transitioning to a different
|
---|
1162 | * code page.
|
---|
1163 | *
|
---|
1164 | * This is used when the previous instruction requires revalidation of opcodes
|
---|
1165 | * bytes and the current instruction stries a page boundrary with opcode bytes
|
---|
1166 | * in both the old and new page.
|
---|
1167 | *
|
---|
1168 | * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
|
---|
1169 | */
|
---|
1170 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
|
---|
1171 | {
|
---|
1172 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1173 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1174 | uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
|
---|
1175 | uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
|
---|
1176 | uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
|
---|
1177 | uint32_t const idxRange2 = idxRange1 + 1;
|
---|
1178 | BODY_SET_CUR_INSTR();
|
---|
1179 | BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
|
---|
1180 | BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
|
---|
1181 | BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
|
---|
1182 | BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
|
---|
1183 | return off;
|
---|
1184 | }
|
---|
1185 | #endif
|
---|
1186 |
|
---|
1187 |
|
---|
1188 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
|
---|
1189 | /**
|
---|
1190 | * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
|
---|
1191 | * advancing naturally to a different code page.
|
---|
1192 | *
|
---|
1193 | * Only opcodes on the new page is checked.
|
---|
1194 | *
|
---|
1195 | * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
|
---|
1196 | */
|
---|
1197 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
|
---|
1198 | {
|
---|
1199 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1200 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1201 | uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
|
---|
1202 | uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
|
---|
1203 | //uint32_t const offRange1 = (uint32_t)uParam2;
|
---|
1204 | uint32_t const idxRange2 = idxRange1 + 1;
|
---|
1205 | BODY_SET_CUR_INSTR();
|
---|
1206 | BODY_CHECK_CS_LIM(cbInstr);
|
---|
1207 | BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
|
---|
1208 | BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
|
---|
1209 | return off;
|
---|
1210 | }
|
---|
1211 | #endif
|
---|
1212 |
|
---|
1213 |
|
---|
1214 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
|
---|
1215 | /**
|
---|
1216 | * Built-in function for loading TLB and checking opcodes when advancing
|
---|
1217 | * naturally to a different code page.
|
---|
1218 | *
|
---|
1219 | * Only opcodes on the new page is checked.
|
---|
1220 | *
|
---|
1221 | * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
|
---|
1222 | */
|
---|
1223 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
|
---|
1224 | {
|
---|
1225 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1226 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1227 | uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
|
---|
1228 | uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
|
---|
1229 | //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
|
---|
1230 | uint32_t const idxRange2 = idxRange1 + 1;
|
---|
1231 | BODY_SET_CUR_INSTR();
|
---|
1232 | BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
|
---|
1233 | BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
|
---|
1234 | return off;
|
---|
1235 | }
|
---|
1236 | #endif
|
---|
1237 |
|
---|
1238 |
|
---|
1239 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
|
---|
1240 | /**
|
---|
1241 | * Built-in function for loading TLB and checking opcodes and considering the
|
---|
1242 | * need for CS.LIM checking when advancing naturally to a different code page.
|
---|
1243 | *
|
---|
1244 | * Only opcodes on the new page is checked.
|
---|
1245 | *
|
---|
1246 | * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
|
---|
1247 | */
|
---|
1248 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
|
---|
1249 | {
|
---|
1250 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1251 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1252 | uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
|
---|
1253 | uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
|
---|
1254 | //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
|
---|
1255 | uint32_t const idxRange2 = idxRange1 + 1;
|
---|
1256 | BODY_SET_CUR_INSTR();
|
---|
1257 | BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
|
---|
1258 | BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
|
---|
1259 | BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
|
---|
1260 | return off;
|
---|
1261 | }
|
---|
1262 | #endif
|
---|
1263 |
|
---|
1264 |
|
---|
1265 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
|
---|
1266 | /**
|
---|
1267 | * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
|
---|
1268 | * advancing naturally to a different code page with first instr at byte 0.
|
---|
1269 | *
|
---|
1270 | * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
|
---|
1271 | */
|
---|
1272 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
|
---|
1273 | {
|
---|
1274 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1275 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1276 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
1277 | BODY_SET_CUR_INSTR();
|
---|
1278 | BODY_CHECK_CS_LIM(cbInstr);
|
---|
1279 | BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
|
---|
1280 | //Assert(pVCpu->iem.s.offCurInstrStart == 0);
|
---|
1281 | BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
|
---|
1282 | return off;
|
---|
1283 | }
|
---|
1284 | #endif
|
---|
1285 |
|
---|
1286 |
|
---|
1287 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
|
---|
1288 | /**
|
---|
1289 | * Built-in function for loading TLB and checking opcodes when advancing
|
---|
1290 | * naturally to a different code page with first instr at byte 0.
|
---|
1291 | *
|
---|
1292 | * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
|
---|
1293 | */
|
---|
1294 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
|
---|
1295 | {
|
---|
1296 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1297 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1298 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
1299 | BODY_SET_CUR_INSTR();
|
---|
1300 | BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
|
---|
1301 | //Assert(pVCpu->iem.s.offCurInstrStart == 0);
|
---|
1302 | BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
|
---|
1303 | return off;
|
---|
1304 | }
|
---|
1305 | #endif
|
---|
1306 |
|
---|
1307 |
|
---|
1308 | #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
|
---|
1309 | /**
|
---|
1310 | * Built-in function for loading TLB and checking opcodes and considering the
|
---|
1311 | * need for CS.LIM checking when advancing naturally to a different code page
|
---|
1312 | * with first instr at byte 0.
|
---|
1313 | *
|
---|
1314 | * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
|
---|
1315 | */
|
---|
1316 | IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
|
---|
1317 | {
|
---|
1318 | PCIEMTB const pTb = pReNative->pTbOrg;
|
---|
1319 | uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
|
---|
1320 | uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
|
---|
1321 | BODY_SET_CUR_INSTR();
|
---|
1322 | BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
|
---|
1323 | BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
|
---|
1324 | //Assert(pVCpu->iem.s.offCurInstrStart == 0);
|
---|
1325 | BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
|
---|
1326 | return off;
|
---|
1327 | }
|
---|
1328 | #endif
|
---|
1329 |
|
---|