VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 101137

Last change on this file since 101137 was 101111, checked in by vboxsync, 18 months ago

VMM/IEM: Dropped IEMTB_F_STATE_XXX as we use dedicated TBs for compiling, remove obsolete TB immediately, and don't need to consider other threads. bugref:10369

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 93.6 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 101111 2023-09-13 14:38:19Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) :
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) :
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95
96
97/*
98 * Narrow down configs here to avoid wasting time on unused configs here.
99 */
100
101#ifndef IEM_WITH_CODE_TLB
102# error The code TLB must be enabled for the recompiler.
103#endif
104
105#ifndef IEM_WITH_DATA_TLB
106# error The data TLB must be enabled for the recompiler.
107#endif
108
109#ifndef IEM_WITH_SETJMP
110# error The setjmp approach must be enabled for the recompiler.
111#endif
112
113
114/*********************************************************************************************************************************
115* Internal Functions *
116*********************************************************************************************************************************/
117static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb);
118static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
119
120
121/**
122 * Calculates the effective address of a ModR/M memory operand, extended version
123 * for use in the recompilers.
124 *
125 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
126 *
127 * May longjmp on internal error.
128 *
129 * @return The effective address.
130 * @param pVCpu The cross context virtual CPU structure of the calling thread.
131 * @param bRm The ModRM byte.
132 * @param cbImmAndRspOffset - First byte: The size of any immediate
133 * following the effective address opcode bytes
134 * (only for RIP relative addressing).
135 * - Second byte: RSP displacement (for POP [ESP]).
136 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
137 * SIB byte (bits 39:32).
138 *
139 * @note This must be defined in a source file with matching
140 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
141 * or implemented differently...
142 */
143RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
144{
145 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
146# define SET_SS_DEF() \
147 do \
148 { \
149 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
150 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
151 } while (0)
152
153 if (!IEM_IS_64BIT_CODE(pVCpu))
154 {
155/** @todo Check the effective address size crap! */
156 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
157 {
158 uint16_t u16EffAddr;
159
160 /* Handle the disp16 form with no registers first. */
161 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
162 {
163 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
164 *puInfo = u16EffAddr;
165 }
166 else
167 {
168 /* Get the displacment. */
169 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
170 {
171 case 0: u16EffAddr = 0; break;
172 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
173 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
174 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
175 }
176 *puInfo = u16EffAddr;
177
178 /* Add the base and index registers to the disp. */
179 switch (bRm & X86_MODRM_RM_MASK)
180 {
181 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
182 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
183 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
184 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
185 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
186 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
187 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
188 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
189 }
190 }
191
192 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
193 return u16EffAddr;
194 }
195
196 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
197 uint32_t u32EffAddr;
198 uint64_t uInfo;
199
200 /* Handle the disp32 form with no registers first. */
201 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
202 {
203 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
204 uInfo = u32EffAddr;
205 }
206 else
207 {
208 /* Get the register (or SIB) value. */
209 uInfo = 0;
210 switch ((bRm & X86_MODRM_RM_MASK))
211 {
212 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
213 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
214 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
215 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
216 case 4: /* SIB */
217 {
218 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
219 uInfo = (uint64_t)bSib << 32;
220
221 /* Get the index and scale it. */
222 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
223 {
224 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
225 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
226 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
227 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
228 case 4: u32EffAddr = 0; /*none */ break;
229 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
230 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
231 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
232 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
233 }
234 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
235
236 /* add base */
237 switch (bSib & X86_SIB_BASE_MASK)
238 {
239 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
240 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
241 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
242 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
243 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
244 case 5:
245 if ((bRm & X86_MODRM_MOD_MASK) != 0)
246 {
247 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
248 SET_SS_DEF();
249 }
250 else
251 {
252 uint32_t u32Disp;
253 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
254 u32EffAddr += u32Disp;
255 uInfo |= u32Disp;
256 }
257 break;
258 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
259 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
260 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
261 }
262 break;
263 }
264 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
265 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
266 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
267 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
268 }
269
270 /* Get and add the displacement. */
271 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
272 {
273 case 0:
274 break;
275 case 1:
276 {
277 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
278 u32EffAddr += i8Disp;
279 uInfo |= (uint32_t)(int32_t)i8Disp;
280 break;
281 }
282 case 2:
283 {
284 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
285 u32EffAddr += u32Disp;
286 uInfo |= u32Disp;
287 break;
288 }
289 default:
290 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
291 }
292 }
293
294 *puInfo = uInfo;
295 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
296 return u32EffAddr;
297 }
298
299 uint64_t u64EffAddr;
300 uint64_t uInfo;
301
302 /* Handle the rip+disp32 form with no registers first. */
303 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
304 {
305 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
306 uInfo = (uint32_t)u64EffAddr;
307 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
308 }
309 else
310 {
311 /* Get the register (or SIB) value. */
312 uInfo = 0;
313 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
314 {
315 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
316 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
317 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
318 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
319 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
320 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
321 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
322 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
323 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
324 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
325 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
326 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
327 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
328 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
329 /* SIB */
330 case 4:
331 case 12:
332 {
333 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
334 uInfo = (uint64_t)bSib << 32;
335
336 /* Get the index and scale it. */
337 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
338 {
339 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
340 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
341 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
342 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
343 case 4: u64EffAddr = 0; /*none */ break;
344 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
345 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
346 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
347 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
348 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
349 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
350 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
351 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
352 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
353 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
354 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
355 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
356 }
357 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
358
359 /* add base */
360 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
361 {
362 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
363 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
364 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
365 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
366 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
367 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
368 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
369 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
370 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
371 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
372 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
373 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
374 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
375 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
376 /* complicated encodings */
377 case 5:
378 case 13:
379 if ((bRm & X86_MODRM_MOD_MASK) != 0)
380 {
381 if (!pVCpu->iem.s.uRexB)
382 {
383 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
384 SET_SS_DEF();
385 }
386 else
387 u64EffAddr += pVCpu->cpum.GstCtx.r13;
388 }
389 else
390 {
391 uint32_t u32Disp;
392 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
393 u64EffAddr += (int32_t)u32Disp;
394 uInfo |= u32Disp;
395 }
396 break;
397 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
398 }
399 break;
400 }
401 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
402 }
403
404 /* Get and add the displacement. */
405 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
406 {
407 case 0:
408 break;
409 case 1:
410 {
411 int8_t i8Disp;
412 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
413 u64EffAddr += i8Disp;
414 uInfo |= (uint32_t)(int32_t)i8Disp;
415 break;
416 }
417 case 2:
418 {
419 uint32_t u32Disp;
420 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
421 u64EffAddr += (int32_t)u32Disp;
422 uInfo |= u32Disp;
423 break;
424 }
425 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
426 }
427
428 }
429
430 *puInfo = uInfo;
431 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
432 {
433 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
434 return u64EffAddr;
435 }
436 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
437 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
438 return u64EffAddr & UINT32_MAX;
439}
440
441
442/*********************************************************************************************************************************
443* Translation Block Cache. *
444*********************************************************************************************************************************/
445
446/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
447static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
448{
449 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
450 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
451 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
452 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
453 if (cMsSinceUse1 != cMsSinceUse2)
454 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
455 if (pTb1->cUsed != pTb2->cUsed)
456 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
457 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
458 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
459 return 0;
460}
461
462#ifdef VBOX_STRICT
463/**
464 * Assertion helper that checks a collisions list count.
465 */
466static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
467{
468 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
469 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
470 while (pTb)
471 {
472 pTb = pTb->pNext;
473 cLeft--;
474 }
475 AssertMsg(cLeft == 0,
476 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
477 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
478}
479#endif
480
481
482DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
483{
484 STAM_PROFILE_START(&pTbCache->StatPrune, a);
485
486 /*
487 * First convert the collision list to an array.
488 */
489 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
490 uintptr_t cInserted = 0;
491 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
492
493 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
494
495 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
496 {
497 apSortedTbs[cInserted++] = pTbCollision;
498 pTbCollision = pTbCollision->pNext;
499 }
500
501 /* Free any excess (impossible). */
502 if (RT_LIKELY(!pTbCollision))
503 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
504 else
505 do
506 {
507 PIEMTB pTbToFree = pTbCollision;
508 pTbCollision = pTbToFree->pNext;
509 iemTbAllocatorFree(pVCpu, pTbToFree);
510 } while (pTbCollision);
511
512 /*
513 * Sort it by most recently used and usage count.
514 */
515 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
516
517 /* We keep half the list for now. Perhaps a bit aggressive... */
518 uintptr_t const cKeep = cInserted / 2;
519
520 /* First free up the TBs we don't wish to keep (before creating the new
521 list because otherwise the free code will scan the list for each one
522 without ever finding it). */
523 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
524 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
525
526 /* Then chain the new TB together with the ones we like to keep of the
527 existing ones and insert this list into the hash table. */
528 pTbCollision = pTb;
529 for (uintptr_t idx = 0; idx < cKeep; idx++)
530 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
531 pTbCollision->pNext = NULL;
532
533 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
534#ifdef VBOX_STRICT
535 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
536#endif
537
538 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
539}
540
541
542static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
543{
544 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
545 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
546 if (!pTbOldHead)
547 {
548 pTb->pNext = NULL;
549 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
550 }
551 else
552 {
553 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
554 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
555 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
556 {
557 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
558 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
559#ifdef VBOX_STRICT
560 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
561#endif
562 }
563 else
564 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
565 }
566}
567
568
569/**
570 * Unlinks @a pTb from the hash table if found in it.
571 *
572 * @returns true if unlinked, false if not present.
573 * @param pTbCache The hash table.
574 * @param pTb The TB to remove.
575 */
576static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
577{
578 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
579 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
580 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
581
582 /*
583 * At the head of the collision list?
584 */
585 if (pTbHash == pTb)
586 {
587 if (!pTb->pNext)
588 pTbCache->apHash[idxHash] = NULL;
589 else
590 {
591 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
592 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
593#ifdef VBOX_STRICT
594 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
595#endif
596 }
597 return true;
598 }
599
600 /*
601 * Search the collision list.
602 */
603 PIEMTB const pTbHead = pTbHash;
604 while (pTbHash)
605 {
606 PIEMTB const pNextTb = pTbHash->pNext;
607 if (pNextTb == pTb)
608 {
609 pTbHash->pNext = pTb->pNext;
610 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
611#ifdef VBOX_STRICT
612 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
613#endif
614 return true;
615 }
616 pTbHash = pNextTb;
617 }
618 return false;
619}
620
621
622/**
623 * Looks up a TB for the given PC and flags in the cache.
624 *
625 * @returns Pointer to TB on success, NULL if not found.
626 * @param pVCpu The cross context virtual CPU structure of the
627 * calling thread.
628 * @param pTbCache The translation block cache.
629 * @param GCPhysPc The PC to look up a TB for.
630 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
631 * the lookup.
632 * @thread EMT(pVCpu)
633 */
634static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
635 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
636{
637 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
638 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
639 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
640#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
641 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
642#endif
643 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p L %d\n", fFlags, GCPhysPc, idxHash, pTb, cLeft));
644 while (pTb)
645 {
646 if (pTb->GCPhysPc == GCPhysPc)
647 {
648 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
649 {
650 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
651 {
652 pTb->cUsed++;
653 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
654 STAM_COUNTER_INC(&pTbCache->cLookupHits);
655 AssertMsg(cLeft > 0, ("%d\n", cLeft));
656 return pTb;
657 }
658 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
659 }
660 else
661 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
662 }
663 else
664 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
665
666 pTb = pTb->pNext;
667#ifdef VBOX_STRICT
668 cLeft--;
669#endif
670 }
671 AssertMsg(cLeft == 0, ("%d\n", cLeft));
672 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
673 return pTb;
674}
675
676
677/*********************************************************************************************************************************
678* Translation Block Allocator.
679*********************************************************************************************************************************/
680/*
681 * Translation block allocationmanagement.
682 */
683
684#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
685# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
686 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
687# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
688 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
689# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
690 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
691#else
692# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
693 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
694# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
695 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
696# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
697 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
698#endif
699/** Makes a TB index from a chunk index and TB index within that chunk. */
700#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
701 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
702
703
704/**
705 * Initializes the TB allocator and cache for an EMT.
706 *
707 * @returns VBox status code.
708 * @param pVM The VM handle.
709 * @param cInitialTbs The initial number of translation blocks to
710 * preallocator.
711 * @param cMaxTbs The max number of translation blocks allowed.
712 * @thread EMT
713 */
714DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs)
715{
716 PVMCPUCC pVCpu = VMMGetCpu(pVM);
717 Assert(!pVCpu->iem.s.pTbCacheR3);
718 Assert(!pVCpu->iem.s.pTbAllocatorR3);
719
720 /*
721 * Calculate the chunk size of the TB allocator.
722 * The minimum chunk size is 2MiB.
723 */
724 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
725 uint32_t cbPerChunk = _2M;
726 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
727#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
728 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
729 uint8_t cChunkShift = 21 - cTbShift;
730 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
731#endif
732 for (;;)
733 {
734 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
735 break;
736 cbPerChunk *= 2;
737 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
738#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
739 cChunkShift += 1;
740#endif
741 }
742
743 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
744 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
745 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
746
747 cMaxTbs = cMaxChunks * cTbsPerChunk;
748
749 /*
750 * Allocate and initalize it.
751 */
752 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
753 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
754 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
755 if (!pTbAllocator)
756 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
757 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
758 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
759 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
760 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
761 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
762 pTbAllocator->cbPerChunk = cbPerChunk;
763 pTbAllocator->cMaxTbs = cMaxTbs;
764#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
765 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
766 pTbAllocator->cChunkShift = cChunkShift;
767 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
768#endif
769
770 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
771 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
772
773 /*
774 * Allocate the initial chunks.
775 */
776 for (uint32_t idxChunk = 0; ; idxChunk++)
777 {
778 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
779 if (!paTbs)
780 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
781 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
782 cbPerChunk, idxChunk, pVCpu->idCpu);
783
784 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
785 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
786 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
787 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
788 pTbAllocator->cTotalTbs += cTbsPerChunk;
789
790 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
791 break;
792 }
793
794 /*
795 * Calculate the size of the hash table. We double the max TB count and
796 * round it up to the nearest power of two.
797 */
798 uint32_t cCacheEntries = cMaxTbs * 2;
799 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
800 {
801 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
802 cCacheEntries = RT_BIT_32(iBitTop);
803 Assert(cCacheEntries >= cMaxTbs * 2);
804 }
805
806 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
807 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
808 if (!pTbCache)
809 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
810 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
811 cbTbCache, cCacheEntries, pVCpu->idCpu);
812
813 /*
814 * Initialize it (assumes zeroed by the allocator).
815 */
816 pTbCache->uMagic = IEMTBCACHE_MAGIC;
817 pTbCache->cHash = cCacheEntries;
818 pTbCache->uHashMask = cCacheEntries - 1;
819 Assert(pTbCache->cHash > pTbCache->uHashMask);
820 pVCpu->iem.s.pTbCacheR3 = pTbCache;
821
822 return VINF_SUCCESS;
823}
824
825
826/**
827 * Inner free worker.
828 */
829static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
830 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
831{
832 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
833 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
834 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
835 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
836
837 /*
838 * Unlink the TB from the hash table.
839 */
840 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
841
842 /*
843 * Free the TB itself.
844 */
845 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
846 {
847 case IEMTB_F_TYPE_THREADED:
848 pTbAllocator->cThreadedTbs -= 1;
849 RTMemFree(pTb->Thrd.paCalls);
850 break;
851 case IEMTB_F_TYPE_NATIVE:
852 pTbAllocator->cNativeTbs -= 1;
853 RTMemFree(pTb->Native.pbCode); /// @todo native: fix me
854 break;
855 default:
856 AssertFailed();
857 }
858 RTMemFree(pTb->pabOpcodes);
859
860 pTb->pNext = NULL;
861 pTb->fFlags = 0;
862 pTb->GCPhysPc = UINT64_MAX;
863 pTb->Gen.uPtr = 0;
864 pTb->Gen.uData = 0;
865 pTb->cbOpcodes = 0;
866 pTb->cbOpcodesAllocated = 0;
867 pTb->pabOpcodes = NULL;
868
869 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
870 Assert(pTbAllocator->cInUseTbs > 0);
871
872 pTbAllocator->cInUseTbs -= 1;
873 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
874}
875
876
877/**
878 * Frees the given TB.
879 *
880 * @param pVCpu The cross context virtual CPU structure of the calling
881 * thread.
882 * @param pTb The translation block to free..
883 * @thread EMT(pVCpu)
884 */
885static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
886{
887 /*
888 * Validate state.
889 */
890 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
891 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
892 uint8_t const idxChunk = pTb->idxAllocChunk;
893 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
894 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
895 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
896
897 /*
898 * Call inner worker.
899 */
900 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
901}
902
903
904/**
905 * Grow the translation block allocator with another chunk.
906 */
907static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
908{
909 /*
910 * Validate state.
911 */
912 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
913 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
914 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
915 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
916 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
917
918 /*
919 * Allocate a new chunk and add it to the allocator.
920 */
921 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
922 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
923 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
924
925 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
926 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
927 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
928 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
929 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
930 pTbAllocator->cTotalTbs += cTbsPerChunk;
931 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
932
933 return VINF_SUCCESS;
934}
935
936
937/**
938 * Allocates a TB from allocator with free block.
939 *
940 * This is common code to both the fast and slow allocator code paths.
941 */
942DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
943{
944 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
945
946 int idxTb;
947 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
948 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
949 pTbAllocator->cTotalTbs,
950 pTbAllocator->iStartHint & ~(uint32_t)63);
951 else
952 idxTb = -1;
953 if (idxTb < 0)
954 {
955 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
956 AssertLogRelReturn(idxTb >= 0, NULL);
957 }
958 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
959 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
960
961 /** @todo shift/mask optimization for power of two IEMTB sizes. */
962 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
963 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
964 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
965 Assert(pTb->idxAllocChunk == idxChunk);
966
967 pTbAllocator->cInUseTbs += 1;
968 if (fThreaded)
969 pTbAllocator->cThreadedTbs += 1;
970 else
971 pTbAllocator->cNativeTbs += 1;
972 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
973 return pTb;
974}
975
976
977/**
978 * Slow path for iemTbAllocatorAlloc.
979 */
980static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
981{
982 /*
983 * With some luck we can add another chunk.
984 */
985 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
986 {
987 int rc = iemTbAllocatorGrow(pVCpu);
988 if (RT_SUCCESS(rc))
989 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
990 }
991
992 /*
993 * We have to prune stuff. Sigh.
994 *
995 * This requires scanning for older TBs and kick them out. Not sure how to
996 * best do this as we don't want to maintain any list of TBs ordered by last
997 * usage time. But one reasonably simple approach would be that each time we
998 * get here we continue a sequential scan of the allocation chunks,
999 * considering just a smallish number of TBs and freeing a fixed portion of
1000 * them. Say, we consider the next 128 TBs, freeing the least recently used
1001 * in out of groups of 4 TBs, resulting in 32 free TBs.
1002 */
1003 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1004 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1005 uint32_t const cTbsToPrune = 128;
1006 uint32_t const cTbsPerGroup = 4;
1007 uint32_t cFreedTbs = 0;
1008#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1009 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1010#else
1011 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1012#endif
1013 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1014 idxTbPruneFrom = 0;
1015 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1016 {
1017 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1018 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1019 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1020 uint32_t cMsAge = msNow - pTb->msLastUsed;
1021 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1022
1023 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1024 {
1025#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1026 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1027 { /* likely */ }
1028 else
1029 {
1030 idxInChunk2 = 0;
1031 idxChunk2 += 1;
1032 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1033 idxChunk2 = 0;
1034 }
1035#endif
1036 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1037 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1038 if ( cMsAge2 > cMsAge
1039 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1040 {
1041 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1042 pTb = pTb2;
1043 idxChunk = idxChunk2;
1044 idxInChunk = idxInChunk2;
1045 cMsAge = cMsAge2;
1046 }
1047 }
1048
1049 /* Free the TB. */
1050 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1051 cFreedTbs++; /* paranoia */
1052 }
1053 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1054 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1055
1056 /*
1057 * Allocate a TB from the ones we've pruned.
1058 */
1059 if (cFreedTbs)
1060 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1061 return NULL;
1062}
1063
1064
1065/**
1066 * Allocate a translation block.
1067 *
1068 * @returns Pointer to block on success, NULL if we're out and is unable to
1069 * free up an existing one (very unlikely once implemented).
1070 * @param pVCpu The cross context virtual CPU structure of the calling
1071 * thread.
1072 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1073 * For statistics.
1074 */
1075DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1076{
1077 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1078 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1079
1080 /* If the allocator is full, take slow code path.*/
1081 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1082 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1083 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1084}
1085
1086
1087/**
1088 * Allocate a translation block for threadeded recompilation.
1089 *
1090 * This is allocated with maxed out call table and storage for opcode bytes,
1091 * because it's only supposed to be called once per EMT to allocate the TB
1092 * pointed to by IEMCPU::pThrdCompileTbR3.
1093 *
1094 * @returns Pointer to the translation block on success, NULL on failure.
1095 * @param pVM The cross context virtual machine structure.
1096 * @param pVCpu The cross context virtual CPU structure of the calling
1097 * thread.
1098 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1099 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1100 */
1101static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1102{
1103 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1104 if (pTb)
1105 {
1106 unsigned const cCalls = 256;
1107 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1108 if (pTb->Thrd.paCalls)
1109 {
1110 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1111 if (pTb->pabOpcodes)
1112 {
1113 pTb->Thrd.cAllocated = cCalls;
1114 pTb->cbOpcodesAllocated = cCalls * 16;
1115 pTb->Thrd.cCalls = 0;
1116 pTb->cbOpcodes = 0;
1117 pTb->pNext = NULL;
1118 pTb->cUsed = 0;
1119 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1120 pTb->idxAllocChunk = UINT8_MAX;
1121 pTb->GCPhysPc = GCPhysPc;
1122 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1123 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1124 pTb->cInstructions = 0;
1125
1126 /* Init the first opcode range. */
1127 pTb->cRanges = 1;
1128 pTb->aRanges[0].cbOpcodes = 0;
1129 pTb->aRanges[0].offOpcodes = 0;
1130 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1131 pTb->aRanges[0].u2Unused = 0;
1132 pTb->aRanges[0].idxPhysPage = 0;
1133 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1134 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1135
1136 return pTb;
1137 }
1138 RTMemFree(pTb->Thrd.paCalls);
1139 }
1140 RTMemFree(pTb);
1141 }
1142 RT_NOREF(pVM);
1143 return NULL;
1144}
1145
1146
1147/**
1148 * Called on the TB that are dedicated for recompilation before it's reused.
1149 *
1150 * @param pVCpu The cross context virtual CPU structure of the calling
1151 * thread.
1152 * @param pTb The translation block to reuse.
1153 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1154 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1155 */
1156static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1157{
1158 pTb->GCPhysPc = GCPhysPc;
1159 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1160 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1161 pTb->Thrd.cCalls = 0;
1162 pTb->cbOpcodes = 0;
1163 pTb->cInstructions = 0;
1164
1165 /* Init the first opcode range. */
1166 pTb->cRanges = 1;
1167 pTb->aRanges[0].cbOpcodes = 0;
1168 pTb->aRanges[0].offOpcodes = 0;
1169 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1170 pTb->aRanges[0].u2Unused = 0;
1171 pTb->aRanges[0].idxPhysPage = 0;
1172 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1173 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1174}
1175
1176
1177/**
1178 * Used to duplicate a threded translation block after recompilation is done.
1179 *
1180 * @returns Pointer to the translation block on success, NULL on failure.
1181 * @param pVM The cross context virtual machine structure.
1182 * @param pVCpu The cross context virtual CPU structure of the calling
1183 * thread.
1184 * @param pTbSrc The TB to duplicate.
1185 */
1186static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1187{
1188 /*
1189 * Just using the heap for now. Will make this more efficient and
1190 * complicated later, don't worry. :-)
1191 */
1192 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1193 if (pTb)
1194 {
1195 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1196 memcpy(pTb, pTbSrc, sizeof(*pTb));
1197 pTb->idxAllocChunk = idxAllocChunk;
1198
1199 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1200 Assert(cCalls > 0);
1201 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1202 if (pTb->Thrd.paCalls)
1203 {
1204 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1205 Assert(cbOpcodes > 0);
1206 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1207 if (pTb->pabOpcodes)
1208 {
1209 pTb->Thrd.cAllocated = cCalls;
1210 pTb->cbOpcodesAllocated = cbOpcodes;
1211 pTb->pNext = NULL;
1212 pTb->cUsed = 0;
1213 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1214 pTb->fFlags = pTbSrc->fFlags;
1215
1216 return pTb;
1217 }
1218 RTMemFree(pTb->Thrd.paCalls);
1219 }
1220 iemTbAllocatorFree(pVCpu, pTb);
1221 }
1222 RT_NOREF(pVM);
1223 return NULL;
1224
1225}
1226
1227
1228/**
1229 * Adds the given TB to the hash table.
1230 *
1231 * @param pVCpu The cross context virtual CPU structure of the calling
1232 * thread.
1233 * @param pTbCache The cache to add it to.
1234 * @param pTb The translation block to add.
1235 */
1236static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1237{
1238 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1239
1240 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1241 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1242 if (LogIs12Enabled())
1243 {
1244 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1245 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1246 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1247 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1248 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1249 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1250 pTb->aRanges[idxRange].idxPhysPage == 0
1251 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1252 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1253 }
1254}
1255
1256
1257/**
1258 * Called by opcode verifier functions when they detect a problem.
1259 */
1260void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb)
1261{
1262 iemTbAllocatorFree(pVCpu, pTb);
1263}
1264
1265
1266/*
1267 * Real code.
1268 */
1269
1270#ifdef LOG_ENABLED
1271/**
1272 * Logs the current instruction.
1273 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1274 * @param pszFunction The IEM function doing the execution.
1275 */
1276static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction) RT_NOEXCEPT
1277{
1278# ifdef IN_RING3
1279 if (LogIs2Enabled())
1280 {
1281 char szInstr[256];
1282 uint32_t cbInstr = 0;
1283 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1284 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1285 szInstr, sizeof(szInstr), &cbInstr);
1286
1287 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1288 Log2(("**** %s fExec=%x pTb=%p\n"
1289 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1290 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1291 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1292 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1293 " %s\n"
1294 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3,
1295 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1296 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1297 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1298 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1299 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1300 szInstr));
1301
1302 if (LogIs3Enabled())
1303 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);
1304 }
1305 else
1306# endif
1307 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1308 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1309}
1310#endif /* LOG_ENABLED */
1311
1312
1313#if 0
1314static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1315{
1316 RT_NOREF(pVM, pVCpu);
1317 return rcStrict;
1318}
1319#endif
1320
1321
1322/**
1323 * Initializes the decoder state when compiling TBs.
1324 *
1325 * This presumes that fExec has already be initialized.
1326 *
1327 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1328 * to apply fixes to them as well.
1329 *
1330 * @param pVCpu The cross context virtual CPU structure of the calling
1331 * thread.
1332 * @param fReInit Clear for the first call for a TB, set for subsequent
1333 * calls from inside the compile loop where we can skip a
1334 * couple of things.
1335 * @param fExtraFlags The extra translation block flags when @a fReInit is
1336 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1337 * checked.
1338 */
1339DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1340{
1341 /* ASSUMES: That iemInitExec was already called and that anyone changing
1342 CPU state affecting the fExec bits since then will have updated fExec! */
1343 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1344 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1345
1346 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1347
1348 /* Decoder state: */
1349 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1350 pVCpu->iem.s.enmEffAddrMode = enmMode;
1351 if (enmMode != IEMMODE_64BIT)
1352 {
1353 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1354 pVCpu->iem.s.enmEffOpSize = enmMode;
1355 }
1356 else
1357 {
1358 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1359 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1360 }
1361 pVCpu->iem.s.fPrefixes = 0;
1362 pVCpu->iem.s.uRexReg = 0;
1363 pVCpu->iem.s.uRexB = 0;
1364 pVCpu->iem.s.uRexIndex = 0;
1365 pVCpu->iem.s.idxPrefix = 0;
1366 pVCpu->iem.s.uVex3rdReg = 0;
1367 pVCpu->iem.s.uVexLength = 0;
1368 pVCpu->iem.s.fEvexStuff = 0;
1369 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1370 pVCpu->iem.s.offModRm = 0;
1371 pVCpu->iem.s.iNextMapping = 0;
1372
1373 if (!fReInit)
1374 {
1375 pVCpu->iem.s.cActiveMappings = 0;
1376 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1377 pVCpu->iem.s.fEndTb = false;
1378 pVCpu->iem.s.fTbCheckOpcodes = false;
1379 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1380 pVCpu->iem.s.fTbCrossedPage = false;
1381 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1382 pVCpu->iem.s.fTbCurInstrIsSti = false;
1383 }
1384 else
1385 {
1386 Assert(pVCpu->iem.s.cActiveMappings == 0);
1387 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1388 Assert(pVCpu->iem.s.fEndTb == false);
1389 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1390 }
1391
1392#ifdef DBGFTRACE_ENABLED
1393 switch (IEM_GET_CPU_MODE(pVCpu))
1394 {
1395 case IEMMODE_64BIT:
1396 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1397 break;
1398 case IEMMODE_32BIT:
1399 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1400 break;
1401 case IEMMODE_16BIT:
1402 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1403 break;
1404 }
1405#endif
1406}
1407
1408
1409/**
1410 * Initializes the opcode fetcher when starting the compilation.
1411 *
1412 * @param pVCpu The cross context virtual CPU structure of the calling
1413 * thread.
1414 */
1415DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1416{
1417 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1418#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1419 pVCpu->iem.s.offOpcode = 0;
1420#else
1421 RT_NOREF(pVCpu);
1422#endif
1423}
1424
1425
1426/**
1427 * Re-initializes the opcode fetcher between instructions while compiling.
1428 *
1429 * @param pVCpu The cross context virtual CPU structure of the calling
1430 * thread.
1431 */
1432DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1433{
1434 if (pVCpu->iem.s.pbInstrBuf)
1435 {
1436 uint64_t off = pVCpu->cpum.GstCtx.rip;
1437 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1438 off += pVCpu->cpum.GstCtx.cs.u64Base;
1439 off -= pVCpu->iem.s.uInstrBufPc;
1440 if (off < pVCpu->iem.s.cbInstrBufTotal)
1441 {
1442 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1443 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1444 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1445 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1446 else
1447 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1448 }
1449 else
1450 {
1451 pVCpu->iem.s.pbInstrBuf = NULL;
1452 pVCpu->iem.s.offInstrNextByte = 0;
1453 pVCpu->iem.s.offCurInstrStart = 0;
1454 pVCpu->iem.s.cbInstrBuf = 0;
1455 pVCpu->iem.s.cbInstrBufTotal = 0;
1456 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1457 }
1458 }
1459 else
1460 {
1461 pVCpu->iem.s.offInstrNextByte = 0;
1462 pVCpu->iem.s.offCurInstrStart = 0;
1463 pVCpu->iem.s.cbInstrBuf = 0;
1464 pVCpu->iem.s.cbInstrBufTotal = 0;
1465#ifdef VBOX_STRICT
1466 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1467#endif
1468 }
1469#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1470 pVCpu->iem.s.offOpcode = 0;
1471#endif
1472}
1473
1474
1475DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1476{
1477 switch (cbInstr)
1478 {
1479 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1480 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1481 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1482 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1483 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1484 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1485 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1486 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1487 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1488 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1489 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1490 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1491 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1492 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1493 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1494 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1495 }
1496}
1497
1498
1499/**
1500 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1501 *
1502 * - CS LIM check required.
1503 * - Must recheck opcode bytes.
1504 * - Previous instruction branched.
1505 * - TLB load detected, probably due to page crossing.
1506 *
1507 * @returns true if everything went well, false if we're out of space in the TB
1508 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1509 * @param pVCpu The cross context virtual CPU structure of the calling
1510 * thread.
1511 * @param pTb The translation block being compiled.
1512 */
1513bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1514{
1515 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1516#if 0
1517 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1518 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1519#endif
1520
1521 /*
1522 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1523 * see if it's needed to start checking.
1524 */
1525 bool fConsiderCsLimChecking;
1526 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1527 if ( fMode == IEM_F_MODE_X86_64BIT
1528 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
1529 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
1530 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
1531 fConsiderCsLimChecking = false; /* already enabled or not needed */
1532 else
1533 {
1534 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1535 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1536 fConsiderCsLimChecking = true; /* likely */
1537 else
1538 {
1539 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
1540 return false;
1541 }
1542 }
1543
1544 /*
1545 * Prepare call now, even before we know if can accept the instruction in this TB.
1546 * This allows us amending parameters w/o making every case suffer.
1547 */
1548 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1549 uint16_t const offOpcode = pTb->cbOpcodes;
1550 uint8_t idxRange = pTb->cRanges - 1;
1551
1552 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1553 pCall->idxInstr = pTb->cInstructions;
1554 pCall->offOpcode = offOpcode;
1555 pCall->idxRange = idxRange;
1556 pCall->cbOpcode = cbInstr;
1557 pCall->auParams[0] = cbInstr;
1558 pCall->auParams[1] = idxRange;
1559 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1560
1561/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1562 * gotten onto. If we do, stop */
1563
1564 /*
1565 * Case 1: We've branched (RIP changed).
1566 *
1567 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1568 * Req: 1 extra range, no extra phys.
1569 *
1570 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1571 * necessary (fTbCrossedPage is true).
1572 * Req: 1 extra range, probably 1 extra phys page entry.
1573 *
1574 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1575 * but in addition we cross into the following page and require
1576 * another TLB load.
1577 * Req: 2 extra ranges, probably 2 extra phys page entries.
1578 *
1579 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1580 * the following page (thus fTbCrossedPage is true).
1581 * Req: 2 extra ranges, probably 1 extra phys page entry.
1582 *
1583 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1584 * it may trigger "spuriously" from the CPU point of view because of
1585 * physical page changes that'll invalid the physical TLB and trigger a
1586 * call to the function. In theory this be a big deal, just a bit
1587 * performance loss as we'll pick the LoadingTlb variants.
1588 *
1589 * Note! We do not currently optimize branching to the next instruction (sorry
1590 * 32-bit PIC code). We could maybe do that in the branching code that
1591 * sets (or not) fTbBranched.
1592 */
1593 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1594 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1595 * code. This'll require filtering out far jmps and calls, as they
1596 * load CS which should technically be considered indirect since the
1597 * GDT/LDT entry's base address can be modified independently from
1598 * the code. */
1599 if (pVCpu->iem.s.fTbBranched != 0)
1600 {
1601 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1602 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1603 {
1604 /* 1a + 1b - instruction fully within the branched to page. */
1605 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1606 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1607
1608 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1609 {
1610 /* Check that we've got a free range. */
1611 idxRange += 1;
1612 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1613 { /* likely */ }
1614 else
1615 {
1616 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1617 return false;
1618 }
1619 pCall->idxRange = idxRange;
1620 pCall->auParams[1] = idxRange;
1621 pCall->auParams[2] = 0;
1622
1623 /* Check that we've got a free page slot. */
1624 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1625 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1626 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1627 pTb->aRanges[idxRange].idxPhysPage = 0;
1628 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1629 || pTb->aGCPhysPages[0] == GCPhysNew)
1630 {
1631 pTb->aGCPhysPages[0] = GCPhysNew;
1632 pTb->aRanges[idxRange].idxPhysPage = 1;
1633 }
1634 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1635 || pTb->aGCPhysPages[1] == GCPhysNew)
1636 {
1637 pTb->aGCPhysPages[1] = GCPhysNew;
1638 pTb->aRanges[idxRange].idxPhysPage = 2;
1639 }
1640 else
1641 {
1642 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1643 return false;
1644 }
1645
1646 /* Finish setting up the new range. */
1647 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1648 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1649 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1650 pTb->aRanges[idxRange].u2Unused = 0;
1651 pTb->cRanges++;
1652 }
1653 else
1654 {
1655 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1656 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1657 }
1658
1659 /* Determin which function we need to load & check.
1660 Note! For jumps to a new page, we'll set both fTbBranched and
1661 fTbCrossedPage to avoid unnecessary TLB work for intra
1662 page branching */
1663 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1664 || pVCpu->iem.s.fTbCrossedPage)
1665 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1666 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1667 : !fConsiderCsLimChecking
1668 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1669 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1670 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1671 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1672 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1673 : !fConsiderCsLimChecking
1674 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1675 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1676 else
1677 {
1678 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1679 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1680 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1681 : !fConsiderCsLimChecking
1682 ? kIemThreadedFunc_BltIn_CheckOpcodes
1683 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1684 }
1685 }
1686 else
1687 {
1688 /* 1c + 1d - instruction crosses pages. */
1689 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1690 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1691
1692 /* Lazy bird: Check that this isn't case 1c, since we've already
1693 load the first physical address. End the TB and
1694 make it a case 2b instead.
1695
1696 Hmm. Too much bother to detect, so just do the same
1697 with case 1d as well. */
1698#if 0 /** @todo get back to this later when we've got the actual branch code in
1699 * place. */
1700 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1701
1702 /* Check that we've got two free ranges. */
1703 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1704 { /* likely */ }
1705 else
1706 return false;
1707 idxRange += 1;
1708 pCall->idxRange = idxRange;
1709 pCall->auParams[1] = idxRange;
1710 pCall->auParams[2] = 0;
1711
1712 /* ... */
1713
1714#else
1715 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1716 return false;
1717#endif
1718 }
1719 }
1720
1721 /*
1722 * Case 2: Page crossing.
1723 *
1724 * Sub-case 2a: The instruction starts on the first byte in the next page.
1725 *
1726 * Sub-case 2b: The instruction has opcode bytes in both the current and
1727 * following page.
1728 *
1729 * Both cases requires a new range table entry and probably a new physical
1730 * page entry. The difference is in which functions to emit and whether to
1731 * add bytes to the current range.
1732 */
1733 else if (pVCpu->iem.s.fTbCrossedPage)
1734 {
1735 /* Check that we've got a free range. */
1736 idxRange += 1;
1737 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1738 { /* likely */ }
1739 else
1740 {
1741 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1742 return false;
1743 }
1744
1745 /* Check that we've got a free page slot. */
1746 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1747 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1748 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1749 pTb->aRanges[idxRange].idxPhysPage = 0;
1750 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1751 || pTb->aGCPhysPages[0] == GCPhysNew)
1752 {
1753 pTb->aGCPhysPages[0] = GCPhysNew;
1754 pTb->aRanges[idxRange].idxPhysPage = 1;
1755 }
1756 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1757 || pTb->aGCPhysPages[1] == GCPhysNew)
1758 {
1759 pTb->aGCPhysPages[1] = GCPhysNew;
1760 pTb->aRanges[idxRange].idxPhysPage = 2;
1761 }
1762 else
1763 {
1764 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1765 return false;
1766 }
1767
1768 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
1769 {
1770 Assert(pVCpu->iem.s.offCurInstrStart == 0);
1771 pCall->idxRange = idxRange;
1772 pCall->auParams[1] = idxRange;
1773 pCall->auParams[2] = 0;
1774
1775 /* Finish setting up the new range. */
1776 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1777 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1778 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1779 pTb->aRanges[idxRange].u2Unused = 0;
1780 pTb->cRanges++;
1781
1782 /* Determin which function we need to load & check. */
1783 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1784 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1785 : !fConsiderCsLimChecking
1786 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1787 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
1788 }
1789 else
1790 {
1791 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1792 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1793 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1794 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
1795
1796 /* We've good. Split the instruction over the old and new range table entries. */
1797 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
1798
1799 pTb->aRanges[idxRange].offPhysPage = 0;
1800 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
1801 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
1802 pTb->aRanges[idxRange].u2Unused = 0;
1803 pTb->cRanges++;
1804
1805 /* Determin which function we need to load & check. */
1806 if (pVCpu->iem.s.fTbCheckOpcodes)
1807 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1808 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1809 : !fConsiderCsLimChecking
1810 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1811 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
1812 else
1813 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1814 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1815 : !fConsiderCsLimChecking
1816 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1817 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
1818 }
1819 }
1820
1821 /*
1822 * Regular case: No new range required.
1823 */
1824 else
1825 {
1826 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
1827 if (pVCpu->iem.s.fTbCheckOpcodes)
1828 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1829 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1830 : kIemThreadedFunc_BltIn_CheckOpcodes;
1831 else
1832 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
1833
1834 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1835 pTb->cbOpcodes = offOpcode + cbInstr;
1836 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1837 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1838 }
1839
1840 /*
1841 * Commit the call.
1842 */
1843 pTb->Thrd.cCalls++;
1844
1845 /*
1846 * Clear state.
1847 */
1848 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1849 pVCpu->iem.s.fTbCrossedPage = false;
1850 pVCpu->iem.s.fTbCheckOpcodes = false;
1851
1852 /*
1853 * Copy opcode bytes.
1854 */
1855 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1856 pTb->cbOpcodes = offOpcode + cbInstr;
1857 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1858
1859 return true;
1860}
1861
1862
1863/**
1864 * Worker for iemThreadedCompileBeginEmitCallsComplications and
1865 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
1866 *
1867 * @returns true if anything is pending, false if not.
1868 * @param pVCpu The cross context virtual CPU structure of the calling
1869 * thread.
1870 */
1871DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
1872{
1873 uint64_t fCpu = pVCpu->fLocalForcedActions;
1874 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
1875#if 1
1876 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
1877 if (RT_LIKELY( !fCpu
1878 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1879 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
1880 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
1881 return false;
1882 return true;
1883#else
1884 return false;
1885#endif
1886
1887}
1888
1889
1890/**
1891 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
1892 * set.
1893 *
1894 * @returns true if we should continue, false if an IRQ is deliverable or a
1895 * relevant force flag is pending.
1896 * @param pVCpu The cross context virtual CPU structure of the calling
1897 * thread.
1898 * @param pTb The translation block being compiled.
1899 * @sa iemThreadedCompileCheckIrq
1900 */
1901bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
1902{
1903 /*
1904 * Skip this we've already emitted a call after the previous instruction
1905 * or if it's the first call, as we're always checking FFs between blocks.
1906 */
1907 uint32_t const idxCall = pTb->Thrd.cCalls;
1908 if ( idxCall > 0
1909 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
1910 {
1911 /* Emit the call. */
1912 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1913 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1914 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1915 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
1916 pCall->idxInstr = pTb->cInstructions;
1917 pCall->uUnused0 = 0;
1918 pCall->offOpcode = 0;
1919 pCall->cbOpcode = 0;
1920 pCall->idxRange = 0;
1921 pCall->auParams[0] = 0;
1922 pCall->auParams[1] = 0;
1923 pCall->auParams[2] = 0;
1924 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1925
1926 /* Reset the IRQ check value. */
1927 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
1928
1929 /*
1930 * Check for deliverable IRQs and pending force flags.
1931 */
1932 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
1933 }
1934 return true; /* continue */
1935}
1936
1937
1938/**
1939 * Emits an IRQ check call and checks for pending IRQs.
1940 *
1941 * @returns true if we should continue, false if an IRQ is deliverable or a
1942 * relevant force flag is pending.
1943 * @param pVCpu The cross context virtual CPU structure of the calling
1944 * thread.
1945 * @param pTb The transation block.
1946 * @sa iemThreadedCompileBeginEmitCallsComplications
1947 */
1948static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
1949{
1950 /* Check again in a little bit, unless it is immediately following an STI
1951 in which case we *must* check immediately after the next instruction
1952 as well in case it's executed with interrupt inhibition. We could
1953 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
1954 bs3-timers-1 which is doing sti + sti + cli. */
1955 if (!pVCpu->iem.s.fTbCurInstrIsSti)
1956 pVCpu->iem.s.cInstrTillIrqCheck = 32;
1957 else
1958 {
1959 pVCpu->iem.s.fTbCurInstrIsSti = false;
1960 pVCpu->iem.s.cInstrTillIrqCheck = 0;
1961 }
1962 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1963
1964 /*
1965 * Emit the call.
1966 */
1967 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
1968 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
1969 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
1970 pCall->idxInstr = pTb->cInstructions;
1971 pCall->uUnused0 = 0;
1972 pCall->offOpcode = 0;
1973 pCall->cbOpcode = 0;
1974 pCall->idxRange = 0;
1975 pCall->auParams[0] = 0;
1976 pCall->auParams[1] = 0;
1977 pCall->auParams[2] = 0;
1978
1979 /*
1980 * Check for deliverable IRQs and pending force flags.
1981 */
1982 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
1983}
1984
1985
1986/**
1987 * Compiles a new TB and executes it.
1988 *
1989 * We combine compilation and execution here as it makes it simpler code flow
1990 * in the main loop and it allows interpreting while compiling if we want to
1991 * explore that option.
1992 *
1993 * @returns Strict VBox status code.
1994 * @param pVM The cross context virtual machine structure.
1995 * @param pVCpu The cross context virtual CPU structure of the calling
1996 * thread.
1997 * @param GCPhysPc The physical address corresponding to the current
1998 * RIP+CS.BASE.
1999 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2000 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2001 */
2002static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2003{
2004 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2005 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2006
2007 /*
2008 * Get the TB we use for the recompiling. This is a maxed-out TB so
2009 * that'll we'll make a more efficient copy of when we're done compiling.
2010 */
2011 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2012 if (pTb)
2013 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2014 else
2015 {
2016 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2017 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2018 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2019 }
2020
2021 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2022 functions may get at it. */
2023 pVCpu->iem.s.pCurTbR3 = pTb;
2024
2025#if 0
2026 /* Make sure the CheckIrq condition matches the one in EM. */
2027 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2028 const uint32_t cZeroCalls = 1;
2029#else
2030 const uint32_t cZeroCalls = 0;
2031#endif
2032
2033 /*
2034 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2035 */
2036 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2037 iemThreadedCompileInitOpcodeFetching(pVCpu);
2038 VBOXSTRICTRC rcStrict;
2039 for (;;)
2040 {
2041 /* Process the next instruction. */
2042#ifdef LOG_ENABLED
2043 iemThreadedLogCurInstr(pVCpu, "CC");
2044 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2045 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2046#endif
2047 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2048 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2049
2050 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2051 if ( rcStrict == VINF_SUCCESS
2052 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2053 && !pVCpu->iem.s.fEndTb)
2054 {
2055 Assert(pTb->Thrd.cCalls > cCallsPrev);
2056 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2057
2058 pVCpu->iem.s.cInstructions++;
2059 }
2060 else
2061 {
2062 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2063 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2064 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2065 rcStrict = VINF_SUCCESS;
2066
2067 if (pTb->Thrd.cCalls > cZeroCalls)
2068 {
2069 if (cCallsPrev != pTb->Thrd.cCalls)
2070 pVCpu->iem.s.cInstructions++;
2071 break;
2072 }
2073
2074 pVCpu->iem.s.pCurTbR3 = NULL;
2075 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2076 }
2077
2078 /* Check for IRQs? */
2079 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2080 pVCpu->iem.s.cInstrTillIrqCheck--;
2081 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2082 break;
2083
2084 /* Still space in the TB? */
2085 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2086 && pTb->cbOpcodes + 16 <= pTb->cbOpcodesAllocated)
2087 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2088 else
2089 {
2090 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2091 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2092 break;
2093 }
2094 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2095 }
2096
2097 /*
2098 * Duplicate the TB into a completed one and link it.
2099 */
2100 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2101 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2102
2103 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2104
2105#ifdef IEM_COMPILE_ONLY_MODE
2106 /*
2107 * Execute the translation block.
2108 */
2109#endif
2110
2111 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2112}
2113
2114
2115/**
2116 * Executes a translation block.
2117 *
2118 * @returns Strict VBox status code.
2119 * @param pVCpu The cross context virtual CPU structure of the calling
2120 * thread.
2121 * @param pTb The translation block to execute.
2122 */
2123static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2124{
2125 /* Check the opcodes in the first page before starting execution. */
2126 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2127 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2128 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2129 { /* likely */ }
2130 else
2131 {
2132 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2133 iemThreadedTbObsolete(pVCpu, pTb);
2134 return VINF_SUCCESS;
2135 }
2136
2137 /* Set the current TB so CIMPL function may get at it. */
2138 pVCpu->iem.s.pCurTbR3 = pTb;
2139 pVCpu->iem.s.cTbExec++;
2140
2141 /* The execution loop. */
2142#ifdef LOG_ENABLED
2143 uint64_t uRipPrev = UINT64_MAX;
2144#endif
2145 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2146 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2147 while (cCallsLeft-- > 0)
2148 {
2149#ifdef LOG_ENABLED
2150 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2151 {
2152 uRipPrev = pVCpu->cpum.GstCtx.rip;
2153 iemThreadedLogCurInstr(pVCpu, "EX");
2154 }
2155 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2156 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2157 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2158#endif
2159 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2160 pCallEntry->auParams[0],
2161 pCallEntry->auParams[1],
2162 pCallEntry->auParams[2]);
2163 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2164 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2165 pCallEntry++;
2166 else
2167 {
2168 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2169 pVCpu->iem.s.pCurTbR3 = NULL;
2170 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2171
2172 /* Some status codes are just to get us out of this loop and
2173 continue in a different translation block. */
2174 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2175 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2176 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2177 }
2178 }
2179
2180 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2181 pVCpu->iem.s.pCurTbR3 = NULL;
2182 return VINF_SUCCESS;
2183}
2184
2185
2186/**
2187 * This is called when the PC doesn't match the current pbInstrBuf.
2188 *
2189 * Upon return, we're ready for opcode fetching. But please note that
2190 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2191 * MMIO or unassigned).
2192 */
2193static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2194{
2195 pVCpu->iem.s.pbInstrBuf = NULL;
2196 pVCpu->iem.s.offCurInstrStart = 0;
2197 pVCpu->iem.s.offInstrNextByte = 0;
2198 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2199 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2200}
2201
2202
2203/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2204DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2205{
2206 /*
2207 * Set uCurTbStartPc to RIP and calc the effective PC.
2208 */
2209 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2210 pVCpu->iem.s.uCurTbStartPc = uPc;
2211 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2212 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2213
2214 /*
2215 * Advance within the current buffer (PAGE) when possible.
2216 */
2217 if (pVCpu->iem.s.pbInstrBuf)
2218 {
2219 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2220 if (off < pVCpu->iem.s.cbInstrBufTotal)
2221 {
2222 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2223 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2224 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2225 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2226 else
2227 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2228
2229 return pVCpu->iem.s.GCPhysInstrBuf + off;
2230 }
2231 }
2232 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2233}
2234
2235
2236/**
2237 * Determines the extra IEMTB_F_XXX flags.
2238 *
2239 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2240 * IEMTB_F_CS_LIM_CHECKS (or zero).
2241 * @param pVCpu The cross context virtual CPU structure of the calling
2242 * thread.
2243 */
2244DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2245{
2246 uint32_t fRet = 0;
2247
2248 /*
2249 * Determine the inhibit bits.
2250 */
2251 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2252 { /* typical */ }
2253 else
2254 {
2255 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2256 fRet |= IEMTB_F_INHIBIT_SHADOW;
2257 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2258 fRet |= IEMTB_F_INHIBIT_NMI;
2259 }
2260
2261 /*
2262 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2263 * likely to go invalid before the end of the translation block.
2264 */
2265 if (IEM_IS_64BIT_CODE(pVCpu))
2266 return fRet;
2267
2268 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2269 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2270 return fRet;
2271 return fRet | IEMTB_F_CS_LIM_CHECKS;
2272}
2273
2274
2275VMMDECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2276{
2277 /*
2278 * See if there is an interrupt pending in TRPM, inject it if we can.
2279 */
2280 if (!TRPMHasTrap(pVCpu))
2281 { /* likely */ }
2282 else
2283 {
2284 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2285 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2286 { /*likely */ }
2287 else
2288 return rcStrict;
2289 }
2290
2291 /*
2292 * Init the execution environment.
2293 */
2294 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2295 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2296 { }
2297 else
2298 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2299
2300 /*
2301 * Run-loop.
2302 *
2303 * If we're using setjmp/longjmp we combine all the catching here to avoid
2304 * having to call setjmp for each block we're executing.
2305 */
2306 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2307 for (;;)
2308 {
2309 PIEMTB pTb = NULL;
2310 VBOXSTRICTRC rcStrict;
2311 IEM_TRY_SETJMP(pVCpu, rcStrict)
2312 {
2313 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2314 for (uint32_t iIterations = 0; ; iIterations++)
2315 {
2316 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2317 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2318 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2319
2320 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2321 if (pTb)
2322 {
2323 if (pTb->fFlags & IEMTB_F_TYPE_THREADED)
2324 rcStrict = iemThreadedTbExec(pVCpu, pTb);
2325 else
2326 AssertFailedStmt(rcStrict = VERR_INTERNAL_ERROR_4);
2327 }
2328 else
2329 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2330 if (rcStrict == VINF_SUCCESS)
2331 {
2332 Assert(pVCpu->iem.s.cActiveMappings == 0);
2333
2334 uint64_t fCpu = pVCpu->fLocalForcedActions;
2335 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2336 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2337 | VMCPU_FF_TLB_FLUSH
2338 | VMCPU_FF_UNHALT );
2339 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2340 if (RT_LIKELY( ( !fCpu
2341 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2342 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2343 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2344 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2345 {
2346 if (RT_LIKELY( (iIterations & cPollRate) != 0
2347 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2348 {
2349
2350 }
2351 else
2352 return VINF_SUCCESS;
2353 }
2354 else
2355 return VINF_SUCCESS;
2356 }
2357 else
2358 return rcStrict;
2359 }
2360 }
2361 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2362 {
2363 pVCpu->iem.s.cLongJumps++;
2364 if (pVCpu->iem.s.cActiveMappings > 0)
2365 iemMemRollback(pVCpu);
2366
2367#if 0 /** @todo do we need to clean up anything? */
2368 /* If pTb isn't NULL we're in iemThreadedTbExec. */
2369 if (!pTb)
2370 {
2371 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2372 pTb = pVCpu->iem.s.pCurTbR3;
2373 if (pTb)
2374 {
2375 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
2376 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
2377 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
2378 }
2379 }
2380#endif
2381 return rcStrict;
2382 }
2383 IEM_CATCH_LONGJMP_END(pVCpu);
2384 }
2385}
2386
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette