VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 105655

Last change on this file since 105655 was 105560, checked in by vboxsync, 7 months ago

VMM/IEM: A few more TLB tracing events. bugref:10727

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 131.5 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 105560 2024-08-01 10:14:06Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
117# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
118#endif
119
120
121/**
122 * Calculates the effective address of a ModR/M memory operand, extended version
123 * for use in the recompilers.
124 *
125 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
126 *
127 * May longjmp on internal error.
128 *
129 * @return The effective address.
130 * @param pVCpu The cross context virtual CPU structure of the calling thread.
131 * @param bRm The ModRM byte.
132 * @param cbImmAndRspOffset - First byte: The size of any immediate
133 * following the effective address opcode bytes
134 * (only for RIP relative addressing).
135 * - Second byte: RSP displacement (for POP [ESP]).
136 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
137 * SIB byte (bits 39:32).
138 *
139 * @note This must be defined in a source file with matching
140 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
141 * or implemented differently...
142 */
143RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
144{
145 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
146# define SET_SS_DEF() \
147 do \
148 { \
149 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
150 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
151 } while (0)
152
153 if (!IEM_IS_64BIT_CODE(pVCpu))
154 {
155/** @todo Check the effective address size crap! */
156 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
157 {
158 uint16_t u16EffAddr;
159
160 /* Handle the disp16 form with no registers first. */
161 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
162 {
163 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
164 *puInfo = u16EffAddr;
165 }
166 else
167 {
168 /* Get the displacment. */
169 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
170 {
171 case 0: u16EffAddr = 0; break;
172 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
173 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
174 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
175 }
176 *puInfo = u16EffAddr;
177
178 /* Add the base and index registers to the disp. */
179 switch (bRm & X86_MODRM_RM_MASK)
180 {
181 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
182 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
183 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
184 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
185 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
186 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
187 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
188 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
189 }
190 }
191
192 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
193 return u16EffAddr;
194 }
195
196 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
197 uint32_t u32EffAddr;
198 uint64_t uInfo;
199
200 /* Handle the disp32 form with no registers first. */
201 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
202 {
203 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
204 uInfo = u32EffAddr;
205 }
206 else
207 {
208 /* Get the register (or SIB) value. */
209 uInfo = 0;
210 switch ((bRm & X86_MODRM_RM_MASK))
211 {
212 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
213 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
214 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
215 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
216 case 4: /* SIB */
217 {
218 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
219 uInfo = (uint64_t)bSib << 32;
220
221 /* Get the index and scale it. */
222 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
223 {
224 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
225 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
226 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
227 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
228 case 4: u32EffAddr = 0; /*none */ break;
229 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
230 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
231 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
232 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
233 }
234 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
235
236 /* add base */
237 switch (bSib & X86_SIB_BASE_MASK)
238 {
239 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
240 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
241 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
242 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
243 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
244 case 5:
245 if ((bRm & X86_MODRM_MOD_MASK) != 0)
246 {
247 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
248 SET_SS_DEF();
249 }
250 else
251 {
252 uint32_t u32Disp;
253 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
254 u32EffAddr += u32Disp;
255 uInfo |= u32Disp;
256 }
257 break;
258 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
259 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
260 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
261 }
262 break;
263 }
264 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
265 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
266 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
267 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
268 }
269
270 /* Get and add the displacement. */
271 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
272 {
273 case 0:
274 break;
275 case 1:
276 {
277 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
278 u32EffAddr += i8Disp;
279 uInfo |= (uint32_t)(int32_t)i8Disp;
280 break;
281 }
282 case 2:
283 {
284 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
285 u32EffAddr += u32Disp;
286 uInfo |= u32Disp;
287 break;
288 }
289 default:
290 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
291 }
292 }
293
294 *puInfo = uInfo;
295 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
296 return u32EffAddr;
297 }
298
299 uint64_t u64EffAddr;
300 uint64_t uInfo;
301
302 /* Handle the rip+disp32 form with no registers first. */
303 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
304 {
305 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
306 uInfo = (uint32_t)u64EffAddr;
307 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
308 }
309 else
310 {
311 /* Get the register (or SIB) value. */
312 uInfo = 0;
313 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
314 {
315 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
316 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
317 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
318 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
319 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
320 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
321 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
322 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
323 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
324 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
325 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
326 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
327 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
328 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
329 /* SIB */
330 case 4:
331 case 12:
332 {
333 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
334 uInfo = (uint64_t)bSib << 32;
335
336 /* Get the index and scale it. */
337 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
338 {
339 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
340 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
341 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
342 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
343 case 4: u64EffAddr = 0; /*none */ break;
344 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
345 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
346 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
347 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
348 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
349 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
350 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
351 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
352 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
353 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
354 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
355 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
356 }
357 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
358
359 /* add base */
360 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
361 {
362 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
363 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
364 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
365 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
366 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
367 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
368 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
369 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
370 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
371 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
372 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
373 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
374 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
375 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
376 /* complicated encodings */
377 case 5:
378 case 13:
379 if ((bRm & X86_MODRM_MOD_MASK) != 0)
380 {
381 if (!pVCpu->iem.s.uRexB)
382 {
383 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
384 SET_SS_DEF();
385 }
386 else
387 u64EffAddr += pVCpu->cpum.GstCtx.r13;
388 }
389 else
390 {
391 uint32_t u32Disp;
392 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
393 u64EffAddr += (int32_t)u32Disp;
394 uInfo |= u32Disp;
395 }
396 break;
397 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
398 }
399 break;
400 }
401 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
402 }
403
404 /* Get and add the displacement. */
405 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
406 {
407 case 0:
408 break;
409 case 1:
410 {
411 int8_t i8Disp;
412 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
413 u64EffAddr += i8Disp;
414 uInfo |= (uint32_t)(int32_t)i8Disp;
415 break;
416 }
417 case 2:
418 {
419 uint32_t u32Disp;
420 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
421 u64EffAddr += (int32_t)u32Disp;
422 uInfo |= u32Disp;
423 break;
424 }
425 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
426 }
427
428 }
429
430 *puInfo = uInfo;
431 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
432 {
433 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
434 return u64EffAddr;
435 }
436 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
437 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
438 return u64EffAddr & UINT32_MAX;
439}
440
441
442/*********************************************************************************************************************************
443* Translation Block Cache. *
444*********************************************************************************************************************************/
445
446/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
447static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
448{
449 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
450 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
451 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
452 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
453 if (cMsSinceUse1 != cMsSinceUse2)
454 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
455 if (pTb1->cUsed != pTb2->cUsed)
456 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
457 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
458 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
459 return 0;
460}
461
462#ifdef VBOX_STRICT
463/**
464 * Assertion helper that checks a collisions list count.
465 */
466static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
467{
468 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
469 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
470 while (pTb)
471 {
472 pTb = pTb->pNext;
473 cLeft--;
474 }
475 AssertMsg(cLeft == 0,
476 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
477 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
478}
479#endif
480
481
482DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
483{
484 STAM_PROFILE_START(&pTbCache->StatPrune, a);
485
486 /*
487 * First convert the collision list to an array.
488 */
489 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
490 uintptr_t cInserted = 0;
491 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
492
493 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
494
495 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
496 {
497 apSortedTbs[cInserted++] = pTbCollision;
498 pTbCollision = pTbCollision->pNext;
499 }
500
501 /* Free any excess (impossible). */
502 if (RT_LIKELY(!pTbCollision))
503 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
504 else
505 do
506 {
507 PIEMTB pTbToFree = pTbCollision;
508 pTbCollision = pTbToFree->pNext;
509 iemTbAllocatorFree(pVCpu, pTbToFree);
510 } while (pTbCollision);
511
512 /*
513 * Sort it by most recently used and usage count.
514 */
515 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
516
517 /* We keep half the list for now. Perhaps a bit aggressive... */
518 uintptr_t const cKeep = cInserted / 2;
519
520 /* First free up the TBs we don't wish to keep (before creating the new
521 list because otherwise the free code will scan the list for each one
522 without ever finding it). */
523 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
524 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
525
526 /* Then chain the new TB together with the ones we like to keep of the
527 existing ones and insert this list into the hash table. */
528 pTbCollision = pTb;
529 for (uintptr_t idx = 0; idx < cKeep; idx++)
530 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
531 pTbCollision->pNext = NULL;
532
533 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
534#ifdef VBOX_STRICT
535 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
536#endif
537
538 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
539}
540
541
542static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
543{
544 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
545 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
546 if (!pTbOldHead)
547 {
548 pTb->pNext = NULL;
549 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
550 }
551 else
552 {
553 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
554 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
555 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
556 {
557 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
558 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
559#ifdef VBOX_STRICT
560 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
561#endif
562 }
563 else
564 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
565 }
566}
567
568
569/**
570 * Unlinks @a pTb from the hash table if found in it.
571 *
572 * @returns true if unlinked, false if not present.
573 * @param pTbCache The hash table.
574 * @param pTb The TB to remove.
575 */
576static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
577{
578 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
579 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
580 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
581
582 /*
583 * At the head of the collision list?
584 */
585 if (pTbHash == pTb)
586 {
587 if (!pTb->pNext)
588 pTbCache->apHash[idxHash] = NULL;
589 else
590 {
591 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
592 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
593#ifdef VBOX_STRICT
594 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
595#endif
596 }
597 return true;
598 }
599
600 /*
601 * Search the collision list.
602 */
603 PIEMTB const pTbHead = pTbHash;
604 while (pTbHash)
605 {
606 PIEMTB const pNextTb = pTbHash->pNext;
607 if (pNextTb == pTb)
608 {
609 pTbHash->pNext = pTb->pNext;
610 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
611#ifdef VBOX_STRICT
612 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
613#endif
614 return true;
615 }
616 pTbHash = pNextTb;
617 }
618 return false;
619}
620
621
622/**
623 * Looks up a TB for the given PC and flags in the cache.
624 *
625 * @returns Pointer to TB on success, NULL if not found.
626 * @param pVCpu The cross context virtual CPU structure of the
627 * calling thread.
628 * @param pTbCache The translation block cache.
629 * @param GCPhysPc The PC to look up a TB for.
630 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
631 * the lookup.
632 * @thread EMT(pVCpu)
633 */
634static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
635 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
636{
637 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
638
639 /*
640 * First consult the lookup table entry.
641 */
642 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
643 PIEMTB pTb = *ppTbLookup;
644 if (pTb)
645 {
646 if (pTb->GCPhysPc == GCPhysPc)
647 {
648 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
649 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
650 {
651 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
652 {
653 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
654 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
655 pTb->cUsed++;
656#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
657 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
658 {
659 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
660 return pTb;
661 }
662 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
663 return iemNativeRecompile(pVCpu, pTb);
664#else
665 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
666 return pTb;
667#endif
668 }
669 }
670 }
671 }
672
673 /*
674 * Then consult the hash table.
675 */
676 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
677#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
678 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
679#endif
680 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
681 while (pTb)
682 {
683 if (pTb->GCPhysPc == GCPhysPc)
684 {
685 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
686 {
687 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
688 {
689 STAM_COUNTER_INC(&pTbCache->cLookupHits);
690 AssertMsg(cLeft > 0, ("%d\n", cLeft));
691
692 *ppTbLookup = pTb;
693 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
694 pTb->cUsed++;
695#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
696 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
697 {
698 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
699 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
700 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
701 return pTb;
702 }
703 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
704 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
705 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
706 return iemNativeRecompile(pVCpu, pTb);
707#else
708 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
709 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
710 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
711 return pTb;
712#endif
713 }
714 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
715 }
716 else
717 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
718 }
719 else
720 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
721
722 pTb = pTb->pNext;
723#ifdef VBOX_STRICT
724 cLeft--;
725#endif
726 }
727 AssertMsg(cLeft == 0, ("%d\n", cLeft));
728 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
729 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
730 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
731 return pTb;
732}
733
734
735/*********************************************************************************************************************************
736* Translation Block Allocator.
737*********************************************************************************************************************************/
738/*
739 * Translation block allocationmanagement.
740 */
741
742#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
743# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
744 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
745# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
746 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
747# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
748 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
749#else
750# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
751 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
752# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
753 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
754# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
755 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
756#endif
757/** Makes a TB index from a chunk index and TB index within that chunk. */
758#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
759 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
760
761
762/**
763 * Initializes the TB allocator and cache for an EMT.
764 *
765 * @returns VBox status code.
766 * @param pVM The VM handle.
767 * @param cInitialTbs The initial number of translation blocks to
768 * preallocator.
769 * @param cMaxTbs The max number of translation blocks allowed.
770 * @param cbInitialExec The initial size of the executable memory allocator.
771 * @param cbMaxExec The max size of the executable memory allocator.
772 * @param cbChunkExec The chunk size for executable memory allocator. Zero
773 * or UINT32_MAX for automatically determining this.
774 * @thread EMT
775 */
776DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
777 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
778{
779 PVMCPUCC pVCpu = VMMGetCpu(pVM);
780 Assert(!pVCpu->iem.s.pTbCacheR3);
781 Assert(!pVCpu->iem.s.pTbAllocatorR3);
782
783 /*
784 * Calculate the chunk size of the TB allocator.
785 * The minimum chunk size is 2MiB.
786 */
787 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
788 uint32_t cbPerChunk = _2M;
789 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
790#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
791 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
792 uint8_t cChunkShift = 21 - cTbShift;
793 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
794#endif
795 for (;;)
796 {
797 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
798 break;
799 cbPerChunk *= 2;
800 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
801#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
802 cChunkShift += 1;
803#endif
804 }
805
806 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
807 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
808 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
809
810 cMaxTbs = cMaxChunks * cTbsPerChunk;
811
812 /*
813 * Allocate and initalize it.
814 */
815 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
816 if (!pTbAllocator)
817 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
818 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
819 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
820 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
821 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
822 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
823 pTbAllocator->cbPerChunk = cbPerChunk;
824 pTbAllocator->cMaxTbs = cMaxTbs;
825 pTbAllocator->pTbsFreeHead = NULL;
826#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
827 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
828 pTbAllocator->cChunkShift = cChunkShift;
829 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
830#endif
831
832 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
833
834 /*
835 * Allocate the initial chunks.
836 */
837 for (uint32_t idxChunk = 0; ; idxChunk++)
838 {
839 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
840 if (!paTbs)
841 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
842 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
843 cbPerChunk, idxChunk, pVCpu->idCpu);
844
845 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
846 {
847 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
848 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
849 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
850 }
851 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
852 pTbAllocator->cTotalTbs += cTbsPerChunk;
853
854 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
855 break;
856 }
857
858 /*
859 * Calculate the size of the hash table. We double the max TB count and
860 * round it up to the nearest power of two.
861 */
862 uint32_t cCacheEntries = cMaxTbs * 2;
863 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
864 {
865 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
866 cCacheEntries = RT_BIT_32(iBitTop);
867 Assert(cCacheEntries >= cMaxTbs * 2);
868 }
869
870 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
871 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
872 if (!pTbCache)
873 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
874 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
875 cbTbCache, cCacheEntries, pVCpu->idCpu);
876
877 /*
878 * Initialize it (assumes zeroed by the allocator).
879 */
880 pTbCache->uMagic = IEMTBCACHE_MAGIC;
881 pTbCache->cHash = cCacheEntries;
882 pTbCache->uHashMask = cCacheEntries - 1;
883 Assert(pTbCache->cHash > pTbCache->uHashMask);
884 pVCpu->iem.s.pTbCacheR3 = pTbCache;
885
886 /*
887 * Initialize the native executable memory allocator.
888 */
889#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
890 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
891 AssertLogRelRCReturn(rc, rc);
892#else
893 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
894#endif
895
896 return VINF_SUCCESS;
897}
898
899
900/**
901 * Inner free worker.
902 */
903static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
904 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
905{
906 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
907 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
908 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
909#ifdef VBOX_STRICT
910 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
911 Assert(pTbOther != pTb);
912#endif
913
914 /*
915 * Unlink the TB from the hash table.
916 */
917 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
918
919 /*
920 * Free the TB itself.
921 */
922 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
923 {
924 case IEMTB_F_TYPE_THREADED:
925 pTbAllocator->cThreadedTbs -= 1;
926 RTMemFree(pTb->Thrd.paCalls);
927 break;
928#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
929 case IEMTB_F_TYPE_NATIVE:
930 pTbAllocator->cNativeTbs -= 1;
931 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
932 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
933 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
934 break;
935#endif
936 default:
937 AssertFailed();
938 }
939
940 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
941
942 pTb->pNext = pTbAllocator->pTbsFreeHead;
943 pTbAllocator->pTbsFreeHead = pTb;
944 pTb->fFlags = 0;
945 pTb->GCPhysPc = UINT64_MAX;
946 pTb->Gen.uPtr = 0;
947 pTb->Gen.uData = 0;
948 pTb->cTbLookupEntries = 0;
949 pTb->cbOpcodes = 0;
950 pTb->pabOpcodes = NULL;
951
952 Assert(pTbAllocator->cInUseTbs > 0);
953
954 pTbAllocator->cInUseTbs -= 1;
955 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
956}
957
958
959/**
960 * Frees the given TB.
961 *
962 * @param pVCpu The cross context virtual CPU structure of the calling
963 * thread.
964 * @param pTb The translation block to free.
965 * @thread EMT(pVCpu)
966 */
967DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
968{
969 /*
970 * Validate state.
971 */
972 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
973 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
974 uint8_t const idxChunk = pTb->idxAllocChunk;
975 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
976 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
977 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
978
979 /*
980 * Invalidate the TB lookup pointer and call the inner worker.
981 */
982 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
983 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
984}
985
986
987/**
988 * Schedules a TB for freeing when it's not longer being executed and/or part of
989 * the caller's call stack.
990 *
991 * The TB will be removed from the translation block cache, though, so it isn't
992 * possible to executed it again and the IEMTB::pNext member can be used to link
993 * it together with other TBs awaiting freeing.
994 *
995 * @param pVCpu The cross context virtual CPU structure of the calling
996 * thread.
997 * @param pTb The translation block to schedule for freeing.
998 */
999static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1000{
1001 /*
1002 * Validate state.
1003 */
1004 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1005 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1006 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1007 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1008 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1009 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1010#ifdef VBOX_STRICT
1011 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1012 Assert(pTbOther != pTb);
1013#endif
1014
1015 /*
1016 * Remove it from the cache and prepend it to the allocator's todo list.
1017 *
1018 * Note! It could still be in various lookup tables, so we trash the GCPhys
1019 * and CS attribs to ensure it won't be reused.
1020 */
1021 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1022 pTb->GCPhysPc = NIL_RTGCPHYS;
1023 pTb->x86.fAttr = UINT16_MAX;
1024
1025 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1026 pTbAllocator->pDelayedFreeHead = pTb;
1027}
1028
1029
1030/**
1031 * Processes the delayed frees.
1032 *
1033 * This is called by the allocator function as well as the native recompile
1034 * function before making any TB or executable memory allocations respectively.
1035 */
1036void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1037{
1038 /** @todo r-bird: these have already been removed from the cache,
1039 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1040 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1041 pTbAllocator->pDelayedFreeHead = NULL;
1042 while (pTb)
1043 {
1044 PIEMTB const pTbNext = pTb->pNext;
1045 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1046 iemTbAllocatorFree(pVCpu, pTb);
1047 pTb = pTbNext;
1048 }
1049}
1050
1051
1052#if 0
1053/**
1054 * Frees all TBs.
1055 */
1056static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1057{
1058 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1059 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1060 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1061
1062 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1063
1064 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1065 while (idxChunk-- > 0)
1066 {
1067 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1068 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1069 while (idxTb-- > 0)
1070 {
1071 PIEMTB const pTb = &paTbs[idxTb];
1072 if (pTb->fFlags)
1073 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1074 }
1075 }
1076
1077 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1078
1079# if 1
1080 /* Reset the free list. */
1081 pTbAllocator->pTbsFreeHead = NULL;
1082 idxChunk = pTbAllocator->cAllocatedChunks;
1083 while (idxChunk-- > 0)
1084 {
1085 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1086 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1087 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1088 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1089 {
1090 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1091 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1092 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1093 }
1094 }
1095# endif
1096
1097# if 1
1098 /* Completely reset the TB cache. */
1099 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1100# endif
1101
1102 return VINF_SUCCESS;
1103}
1104#endif
1105
1106
1107/**
1108 * Grow the translation block allocator with another chunk.
1109 */
1110static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1111{
1112 /*
1113 * Validate state.
1114 */
1115 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1116 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1117 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1118 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1119 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1120
1121 /*
1122 * Allocate a new chunk and add it to the allocator.
1123 */
1124 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1125 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1126 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1127
1128 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1129 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1130 {
1131 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1132 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1133 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1134 }
1135 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1136 pTbAllocator->cTotalTbs += cTbsPerChunk;
1137
1138 return VINF_SUCCESS;
1139}
1140
1141
1142/**
1143 * Allocates a TB from allocator with free block.
1144 *
1145 * This is common code to both the fast and slow allocator code paths.
1146 */
1147DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1148{
1149 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1150 Assert(pTbAllocator->pTbsFreeHead);
1151
1152 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1153 pTbAllocator->pTbsFreeHead = pTb->pNext;
1154 pTbAllocator->cInUseTbs += 1;
1155 if (fThreaded)
1156 pTbAllocator->cThreadedTbs += 1;
1157 else
1158 pTbAllocator->cNativeTbs += 1;
1159 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1160 return pTb;
1161}
1162
1163
1164/**
1165 * Slow path for iemTbAllocatorAlloc.
1166 */
1167static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1168{
1169 /*
1170 * With some luck we can add another chunk.
1171 */
1172 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1173 {
1174 int rc = iemTbAllocatorGrow(pVCpu);
1175 if (RT_SUCCESS(rc))
1176 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1177 }
1178
1179 /*
1180 * We have to prune stuff. Sigh.
1181 *
1182 * This requires scanning for older TBs and kick them out. Not sure how to
1183 * best do this as we don't want to maintain any list of TBs ordered by last
1184 * usage time. But one reasonably simple approach would be that each time we
1185 * get here we continue a sequential scan of the allocation chunks,
1186 * considering just a smallish number of TBs and freeing a fixed portion of
1187 * them. Say, we consider the next 128 TBs, freeing the least recently used
1188 * in out of groups of 4 TBs, resulting in 32 free TBs.
1189 */
1190 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1191 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1192 uint32_t const cTbsToPrune = 128;
1193 uint32_t const cTbsPerGroup = 4;
1194 uint32_t cFreedTbs = 0;
1195#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1196 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1197#else
1198 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1199#endif
1200 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1201 idxTbPruneFrom = 0;
1202 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1203 {
1204 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1205 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1206 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1207 uint32_t cMsAge = msNow - pTb->msLastUsed;
1208 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1209
1210 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1211 {
1212#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1213 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1214 { /* likely */ }
1215 else
1216 {
1217 idxInChunk2 = 0;
1218 idxChunk2 += 1;
1219 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1220 idxChunk2 = 0;
1221 }
1222#endif
1223 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1224 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1225 if ( cMsAge2 > cMsAge
1226 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1227 {
1228 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1229 pTb = pTb2;
1230 idxChunk = idxChunk2;
1231 idxInChunk = idxInChunk2;
1232 cMsAge = cMsAge2;
1233 }
1234 }
1235
1236 /* Free the TB. */
1237 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1238 cFreedTbs++; /* paranoia */
1239 }
1240 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1241 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1242
1243 /* Flush the TB lookup entry pointer. */
1244 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1245
1246 /*
1247 * Allocate a TB from the ones we've pruned.
1248 */
1249 if (cFreedTbs)
1250 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1251 return NULL;
1252}
1253
1254
1255/**
1256 * Allocate a translation block.
1257 *
1258 * @returns Pointer to block on success, NULL if we're out and is unable to
1259 * free up an existing one (very unlikely once implemented).
1260 * @param pVCpu The cross context virtual CPU structure of the calling
1261 * thread.
1262 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1263 * For statistics.
1264 */
1265DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1266{
1267 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1268 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1269
1270 /* Free any pending TBs before we proceed. */
1271 if (!pTbAllocator->pDelayedFreeHead)
1272 { /* probably likely */ }
1273 else
1274 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1275
1276 /* If the allocator is full, take slow code path.*/
1277 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1278 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1279 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1280}
1281
1282
1283/**
1284 * This is called when we're out of space for native TBs.
1285 *
1286 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1287 * The difference is that we only prune native TBs and will only free any if
1288 * there are least two in a group. The conditions under which we're called are
1289 * different - there will probably be free TBs in the table when we're called.
1290 * Therefore we increase the group size and max scan length, though we'll stop
1291 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1292 * up at least 8 TBs.
1293 */
1294void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1295{
1296 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1297 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1298
1299 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1300
1301 /*
1302 * Flush the delayed free list before we start freeing TBs indiscriminately.
1303 */
1304 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1305
1306 /*
1307 * Scan and free TBs.
1308 */
1309 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1310 uint32_t const cTbsToPrune = 128 * 8;
1311 uint32_t const cTbsPerGroup = 4 * 4;
1312 uint32_t cFreedTbs = 0;
1313 uint32_t cMaxInstrs = 0;
1314 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1315 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1316 {
1317 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1318 idxTbPruneFrom = 0;
1319 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1320 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1321 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1322 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1323 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1324
1325 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1326 {
1327 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1328 { /* likely */ }
1329 else
1330 {
1331 idxInChunk2 = 0;
1332 idxChunk2 += 1;
1333 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1334 idxChunk2 = 0;
1335 }
1336 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1337 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1338 {
1339 cNativeTbs += 1;
1340 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1341 if ( cMsAge2 > cMsAge
1342 || ( cMsAge2 == cMsAge
1343 && ( pTb2->cUsed < pTb->cUsed
1344 || ( pTb2->cUsed == pTb->cUsed
1345 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1346 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1347 {
1348 pTb = pTb2;
1349 idxChunk = idxChunk2;
1350 idxInChunk = idxInChunk2;
1351 cMsAge = cMsAge2;
1352 }
1353 }
1354 }
1355
1356 /* Free the TB if we found at least two native one in this group. */
1357 if (cNativeTbs >= 2)
1358 {
1359 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1360 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1361 cFreedTbs++;
1362 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1363 break;
1364 }
1365 }
1366 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1367
1368 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1369}
1370
1371
1372/*********************************************************************************************************************************
1373* Threaded Recompiler Core *
1374*********************************************************************************************************************************/
1375/**
1376 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1377 * @returns pszBuf.
1378 * @param fFlags The flags.
1379 * @param pszBuf The output buffer.
1380 * @param cbBuf The output buffer size. At least 32 bytes.
1381 */
1382DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1383{
1384 Assert(cbBuf >= 32);
1385 static RTSTRTUPLE const s_aModes[] =
1386 {
1387 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1388 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1389 /* [02] = */ { RT_STR_TUPLE("!2!") },
1390 /* [03] = */ { RT_STR_TUPLE("!3!") },
1391 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1392 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1393 /* [06] = */ { RT_STR_TUPLE("!6!") },
1394 /* [07] = */ { RT_STR_TUPLE("!7!") },
1395 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1396 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1397 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1398 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1399 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1400 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1401 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1402 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1403 /* [10] = */ { RT_STR_TUPLE("!10!") },
1404 /* [11] = */ { RT_STR_TUPLE("!11!") },
1405 /* [12] = */ { RT_STR_TUPLE("!12!") },
1406 /* [13] = */ { RT_STR_TUPLE("!13!") },
1407 /* [14] = */ { RT_STR_TUPLE("!14!") },
1408 /* [15] = */ { RT_STR_TUPLE("!15!") },
1409 /* [16] = */ { RT_STR_TUPLE("!16!") },
1410 /* [17] = */ { RT_STR_TUPLE("!17!") },
1411 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1412 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1413 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1414 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1415 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1416 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1417 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1418 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1419 };
1420 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1421 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1422 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1423
1424 pszBuf[off++] = ' ';
1425 pszBuf[off++] = 'C';
1426 pszBuf[off++] = 'P';
1427 pszBuf[off++] = 'L';
1428 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1429 Assert(off < 32);
1430
1431 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1432
1433 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1434 {
1435 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1436 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1437 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1438 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1439 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1440 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1441 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1442 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1443 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1444 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1445 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1446 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1447 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1448 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1449 };
1450 if (fFlags)
1451 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1452 if (s_aFlags[i].fFlag & fFlags)
1453 {
1454 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1455 pszBuf[off++] = ' ';
1456 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1457 off += s_aFlags[i].cchName;
1458 fFlags &= ~s_aFlags[i].fFlag;
1459 if (!fFlags)
1460 break;
1461 }
1462 pszBuf[off] = '\0';
1463
1464 return pszBuf;
1465}
1466
1467
1468/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1469static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1470{
1471 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1472 pDis->cbCachedInstr += cbMaxRead;
1473 RT_NOREF(cbMinRead);
1474 return VERR_NO_DATA;
1475}
1476
1477
1478/**
1479 * Worker for iemThreadedDisassembleTb.
1480 */
1481static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1482 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1483{
1484 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1485 {
1486 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1487 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1488 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1489 {
1490 PIEMTB pLookupTb = papTbLookup[iLookup];
1491 if (pLookupTb)
1492 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1493 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1494 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1495 : "invalid");
1496 else
1497 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1498 }
1499 pHlp->pfnPrintf(pHlp, "\n");
1500 }
1501 else
1502 {
1503 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1504 idxFirst, cEntries, pTb->cTbLookupEntries);
1505 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1506 }
1507}
1508
1509
1510DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1511{
1512 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1513
1514 char szDisBuf[512];
1515
1516 /*
1517 * Print TB info.
1518 */
1519 pHlp->pfnPrintf(pHlp,
1520 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1521 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1522 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1523 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1524
1525 /*
1526 * This disassembly is driven by the debug info which follows the native
1527 * code and indicates when it starts with the next guest instructions,
1528 * where labels are and such things.
1529 */
1530 DISSTATE Dis;
1531 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1532 uint32_t const cCalls = pTb->Thrd.cCalls;
1533 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1534 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1535 : DISCPUMODE_64BIT;
1536 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1537 uint8_t idxRange = UINT8_MAX;
1538 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1539 uint32_t offRange = 0;
1540 uint32_t offOpcodes = 0;
1541 uint32_t const cbOpcodes = pTb->cbOpcodes;
1542 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1543 bool fTbLookupSeen0 = false;
1544
1545 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1546 {
1547 /*
1548 * New opcode range?
1549 */
1550 if ( idxRange == UINT8_MAX
1551 || idxRange >= cRanges
1552 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1553 {
1554 idxRange += 1;
1555 if (idxRange < cRanges)
1556 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1557 else
1558 continue;
1559 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1560 + (pTb->aRanges[idxRange].idxPhysPage == 0
1561 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1562 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1563 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1564 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1565 pTb->aRanges[idxRange].idxPhysPage);
1566 GCPhysPc += offRange;
1567 }
1568
1569 /*
1570 * Disassemble another guest instruction?
1571 */
1572 if ( paCalls[iCall].offOpcode != offOpcodes
1573 && paCalls[iCall].cbOpcode > 0
1574 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1575 {
1576 offOpcodes = paCalls[iCall].offOpcode;
1577 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1578 uint32_t cbInstr = 1;
1579 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1580 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1581 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1582 if (RT_SUCCESS(rc))
1583 {
1584 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1585 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1586 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1587 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1588 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1589 }
1590 else
1591 {
1592 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1593 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1594 cbInstr = paCalls[iCall].cbOpcode;
1595 }
1596 GCPhysPc += cbInstr;
1597 offRange += cbInstr;
1598 }
1599
1600 /*
1601 * Dump call details.
1602 */
1603 pHlp->pfnPrintf(pHlp,
1604 " Call #%u to %s (%u args)\n",
1605 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1606 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1607 if (paCalls[iCall].uTbLookup != 0)
1608 {
1609 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1610 fTbLookupSeen0 = idxFirst == 0;
1611 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1612 }
1613
1614 /*
1615 * Snoop fExec.
1616 */
1617 switch (paCalls[iCall].enmFunction)
1618 {
1619 default:
1620 break;
1621 case kIemThreadedFunc_BltIn_CheckMode:
1622 fExec = paCalls[iCall].auParams[0];
1623 break;
1624 }
1625 }
1626
1627 if (!fTbLookupSeen0)
1628 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1629}
1630
1631
1632
1633/**
1634 * Allocate a translation block for threadeded recompilation.
1635 *
1636 * This is allocated with maxed out call table and storage for opcode bytes,
1637 * because it's only supposed to be called once per EMT to allocate the TB
1638 * pointed to by IEMCPU::pThrdCompileTbR3.
1639 *
1640 * @returns Pointer to the translation block on success, NULL on failure.
1641 * @param pVM The cross context virtual machine structure.
1642 * @param pVCpu The cross context virtual CPU structure of the calling
1643 * thread.
1644 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1645 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1646 */
1647static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1648{
1649 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1650 if (pTb)
1651 {
1652 unsigned const cCalls = 256;
1653 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1654 if (pTb->Thrd.paCalls)
1655 {
1656 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1657 if (pTb->pabOpcodes)
1658 {
1659 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1660 pTb->Thrd.cAllocated = cCalls;
1661 pTb->Thrd.cCalls = 0;
1662 pTb->cbOpcodes = 0;
1663 pTb->pNext = NULL;
1664 pTb->cUsed = 0;
1665 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1666 pTb->idxAllocChunk = UINT8_MAX;
1667 pTb->GCPhysPc = GCPhysPc;
1668 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1669 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1670 pTb->cInstructions = 0;
1671 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1672
1673 /* Init the first opcode range. */
1674 pTb->cRanges = 1;
1675 pTb->aRanges[0].cbOpcodes = 0;
1676 pTb->aRanges[0].offOpcodes = 0;
1677 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1678 pTb->aRanges[0].u2Unused = 0;
1679 pTb->aRanges[0].idxPhysPage = 0;
1680 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1681 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1682
1683 return pTb;
1684 }
1685 RTMemFree(pTb->Thrd.paCalls);
1686 }
1687 RTMemFree(pTb);
1688 }
1689 RT_NOREF(pVM);
1690 return NULL;
1691}
1692
1693
1694/**
1695 * Called on the TB that are dedicated for recompilation before it's reused.
1696 *
1697 * @param pVCpu The cross context virtual CPU structure of the calling
1698 * thread.
1699 * @param pTb The translation block to reuse.
1700 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1701 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1702 */
1703static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1704{
1705 pTb->GCPhysPc = GCPhysPc;
1706 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1707 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1708 pTb->Thrd.cCalls = 0;
1709 pTb->cbOpcodes = 0;
1710 pTb->cInstructions = 0;
1711 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1712
1713 /* Init the first opcode range. */
1714 pTb->cRanges = 1;
1715 pTb->aRanges[0].cbOpcodes = 0;
1716 pTb->aRanges[0].offOpcodes = 0;
1717 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1718 pTb->aRanges[0].u2Unused = 0;
1719 pTb->aRanges[0].idxPhysPage = 0;
1720 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1721 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1722}
1723
1724
1725/**
1726 * Used to duplicate a threded translation block after recompilation is done.
1727 *
1728 * @returns Pointer to the translation block on success, NULL on failure.
1729 * @param pVM The cross context virtual machine structure.
1730 * @param pVCpu The cross context virtual CPU structure of the calling
1731 * thread.
1732 * @param pTbSrc The TB to duplicate.
1733 */
1734static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1735{
1736 /*
1737 * Just using the heap for now. Will make this more efficient and
1738 * complicated later, don't worry. :-)
1739 */
1740 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1741 if (pTb)
1742 {
1743 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1744 memcpy(pTb, pTbSrc, sizeof(*pTb));
1745 pTb->idxAllocChunk = idxAllocChunk;
1746
1747 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1748 Assert(cCalls > 0);
1749 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1750 if (pTb->Thrd.paCalls)
1751 {
1752 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1753 Assert(cbTbLookup > 0);
1754 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1755 Assert(cbOpcodes > 0);
1756 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1757 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1758 if (pbBoth)
1759 {
1760 RT_BZERO(pbBoth, cbTbLookup);
1761 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1762 pTb->Thrd.cAllocated = cCalls;
1763 pTb->pNext = NULL;
1764 pTb->cUsed = 0;
1765 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1766 pTb->fFlags = pTbSrc->fFlags;
1767
1768 return pTb;
1769 }
1770 RTMemFree(pTb->Thrd.paCalls);
1771 }
1772 iemTbAllocatorFree(pVCpu, pTb);
1773 }
1774 RT_NOREF(pVM);
1775 return NULL;
1776
1777}
1778
1779
1780/**
1781 * Adds the given TB to the hash table.
1782 *
1783 * @param pVCpu The cross context virtual CPU structure of the calling
1784 * thread.
1785 * @param pTbCache The cache to add it to.
1786 * @param pTb The translation block to add.
1787 */
1788static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1789{
1790 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1791
1792 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1793 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1794 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1795 if (LogIs12Enabled())
1796 {
1797 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1798 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1799 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1800 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1801 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1802 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1803 pTb->aRanges[idxRange].idxPhysPage == 0
1804 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1805 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1806 }
1807}
1808
1809
1810/**
1811 * Called by opcode verifier functions when they detect a problem.
1812 */
1813void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1814{
1815 /* We cannot free the current TB (indicated by fSafeToFree) because:
1816 - A threaded TB will have its current call entry accessed
1817 to update pVCpu->iem.s.cInstructions.
1818 - A native TB will have code left to execute. */
1819 if (fSafeToFree)
1820 iemTbAllocatorFree(pVCpu, pTb);
1821 else
1822 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1823}
1824
1825
1826/*
1827 * Real code.
1828 */
1829
1830#ifdef LOG_ENABLED
1831/**
1832 * Logs the current instruction.
1833 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1834 * @param pszFunction The IEM function doing the execution.
1835 * @param idxInstr The instruction number in the block.
1836 */
1837static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1838{
1839# ifdef IN_RING3
1840 if (LogIs2Enabled())
1841 {
1842 char szInstr[256];
1843 uint32_t cbInstr = 0;
1844 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1845 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1846 szInstr, sizeof(szInstr), &cbInstr);
1847
1848 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1849 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1850 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1851 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1852 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1853 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1854 " %s\n"
1855 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1856 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1857 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1858 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1859 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1860 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1861 szInstr));
1862
1863 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1864 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1865 }
1866 else
1867# endif
1868 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1869 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1870}
1871#endif /* LOG_ENABLED */
1872
1873
1874#if 0
1875static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1876{
1877 RT_NOREF(pVM, pVCpu);
1878 return rcStrict;
1879}
1880#endif
1881
1882
1883/**
1884 * Initializes the decoder state when compiling TBs.
1885 *
1886 * This presumes that fExec has already be initialized.
1887 *
1888 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1889 * to apply fixes to them as well.
1890 *
1891 * @param pVCpu The cross context virtual CPU structure of the calling
1892 * thread.
1893 * @param fReInit Clear for the first call for a TB, set for subsequent
1894 * calls from inside the compile loop where we can skip a
1895 * couple of things.
1896 * @param fExtraFlags The extra translation block flags when @a fReInit is
1897 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1898 * checked.
1899 */
1900DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1901{
1902 /* ASSUMES: That iemInitExec was already called and that anyone changing
1903 CPU state affecting the fExec bits since then will have updated fExec! */
1904 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1905 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1906
1907 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1908
1909 /* Decoder state: */
1910 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1911 pVCpu->iem.s.enmEffAddrMode = enmMode;
1912 if (enmMode != IEMMODE_64BIT)
1913 {
1914 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1915 pVCpu->iem.s.enmEffOpSize = enmMode;
1916 }
1917 else
1918 {
1919 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1920 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1921 }
1922 pVCpu->iem.s.fPrefixes = 0;
1923 pVCpu->iem.s.uRexReg = 0;
1924 pVCpu->iem.s.uRexB = 0;
1925 pVCpu->iem.s.uRexIndex = 0;
1926 pVCpu->iem.s.idxPrefix = 0;
1927 pVCpu->iem.s.uVex3rdReg = 0;
1928 pVCpu->iem.s.uVexLength = 0;
1929 pVCpu->iem.s.fEvexStuff = 0;
1930 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1931 pVCpu->iem.s.offModRm = 0;
1932 pVCpu->iem.s.iNextMapping = 0;
1933
1934 if (!fReInit)
1935 {
1936 pVCpu->iem.s.cActiveMappings = 0;
1937 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1938 pVCpu->iem.s.fEndTb = false;
1939 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
1940 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1941 pVCpu->iem.s.fTbCrossedPage = false;
1942 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1943 pVCpu->iem.s.fTbCurInstrIsSti = false;
1944 /* Force RF clearing and TF checking on first instruction in the block
1945 as we don't really know what came before and should assume the worst: */
1946 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1947 }
1948 else
1949 {
1950 Assert(pVCpu->iem.s.cActiveMappings == 0);
1951 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1952 Assert(pVCpu->iem.s.fEndTb == false);
1953 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1954 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1955 }
1956 pVCpu->iem.s.fTbCurInstr = 0;
1957
1958#ifdef DBGFTRACE_ENABLED
1959 switch (IEM_GET_CPU_MODE(pVCpu))
1960 {
1961 case IEMMODE_64BIT:
1962 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1963 break;
1964 case IEMMODE_32BIT:
1965 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1966 break;
1967 case IEMMODE_16BIT:
1968 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1969 break;
1970 }
1971#endif
1972}
1973
1974
1975/**
1976 * Initializes the opcode fetcher when starting the compilation.
1977 *
1978 * @param pVCpu The cross context virtual CPU structure of the calling
1979 * thread.
1980 */
1981DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1982{
1983 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1984#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1985 pVCpu->iem.s.offOpcode = 0;
1986#else
1987 RT_NOREF(pVCpu);
1988#endif
1989}
1990
1991
1992/**
1993 * Re-initializes the opcode fetcher between instructions while compiling.
1994 *
1995 * @param pVCpu The cross context virtual CPU structure of the calling
1996 * thread.
1997 */
1998DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1999{
2000 if (pVCpu->iem.s.pbInstrBuf)
2001 {
2002 uint64_t off = pVCpu->cpum.GstCtx.rip;
2003 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2004 off += pVCpu->cpum.GstCtx.cs.u64Base;
2005 off -= pVCpu->iem.s.uInstrBufPc;
2006 if (off < pVCpu->iem.s.cbInstrBufTotal)
2007 {
2008 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2009 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2010 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2011 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2012 else
2013 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2014 }
2015 else
2016 {
2017 pVCpu->iem.s.pbInstrBuf = NULL;
2018 pVCpu->iem.s.offInstrNextByte = 0;
2019 pVCpu->iem.s.offCurInstrStart = 0;
2020 pVCpu->iem.s.cbInstrBuf = 0;
2021 pVCpu->iem.s.cbInstrBufTotal = 0;
2022 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2023 }
2024 }
2025 else
2026 {
2027 pVCpu->iem.s.offInstrNextByte = 0;
2028 pVCpu->iem.s.offCurInstrStart = 0;
2029 pVCpu->iem.s.cbInstrBuf = 0;
2030 pVCpu->iem.s.cbInstrBufTotal = 0;
2031#ifdef VBOX_STRICT
2032 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2033#endif
2034 }
2035#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2036 pVCpu->iem.s.offOpcode = 0;
2037#endif
2038}
2039
2040#ifdef LOG_ENABLED
2041
2042/**
2043 * Inserts a NOP call.
2044 *
2045 * This is for debugging.
2046 *
2047 * @returns true on success, false if we're out of call entries.
2048 * @param pTb The translation block being compiled.
2049 */
2050bool iemThreadedCompileEmitNop(PIEMTB pTb)
2051{
2052 /* Emit the call. */
2053 uint32_t const idxCall = pTb->Thrd.cCalls;
2054 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2055 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2056 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2057 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2058 pCall->idxInstr = pTb->cInstructions - 1;
2059 pCall->cbOpcode = 0;
2060 pCall->offOpcode = 0;
2061 pCall->uTbLookup = 0;
2062 pCall->uUnused0 = 0;
2063 pCall->auParams[0] = 0;
2064 pCall->auParams[1] = 0;
2065 pCall->auParams[2] = 0;
2066 return true;
2067}
2068
2069
2070/**
2071 * Called by iemThreadedCompile if cpu state logging is desired.
2072 *
2073 * @returns true on success, false if we're out of call entries.
2074 * @param pTb The translation block being compiled.
2075 */
2076bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2077{
2078 /* Emit the call. */
2079 uint32_t const idxCall = pTb->Thrd.cCalls;
2080 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2081 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2082 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2083 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2084 pCall->idxInstr = pTb->cInstructions - 1;
2085 pCall->cbOpcode = 0;
2086 pCall->offOpcode = 0;
2087 pCall->uTbLookup = 0;
2088 pCall->uUnused0 = 0;
2089 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2090 pCall->auParams[1] = 0;
2091 pCall->auParams[2] = 0;
2092 return true;
2093}
2094
2095#endif /* LOG_ENABLED */
2096
2097DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2098{
2099 switch (cbInstr)
2100 {
2101 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2102 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2103 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2104 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2105 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2106 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2107 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2108 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2109 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2110 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2111 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2112 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2113 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2114 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2115 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2116 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2117 }
2118}
2119
2120
2121/**
2122 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2123 *
2124 * - CS LIM check required.
2125 * - Must recheck opcode bytes.
2126 * - Previous instruction branched.
2127 * - TLB load detected, probably due to page crossing.
2128 *
2129 * @returns true if everything went well, false if we're out of space in the TB
2130 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2131 * @param pVCpu The cross context virtual CPU structure of the calling
2132 * thread.
2133 * @param pTb The translation block being compiled.
2134 */
2135bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2136{
2137 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2138 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2139#if 0
2140 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2141 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2142#endif
2143
2144 /*
2145 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2146 * see if it's needed to start checking.
2147 */
2148 bool fConsiderCsLimChecking;
2149 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2150 if ( fMode == IEM_F_MODE_X86_64BIT
2151 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2152 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2153 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2154 fConsiderCsLimChecking = false; /* already enabled or not needed */
2155 else
2156 {
2157 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2158 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2159 fConsiderCsLimChecking = true; /* likely */
2160 else
2161 {
2162 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2163 return false;
2164 }
2165 }
2166
2167 /*
2168 * Prepare call now, even before we know if can accept the instruction in this TB.
2169 * This allows us amending parameters w/o making every case suffer.
2170 */
2171 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2172 uint16_t const offOpcode = pTb->cbOpcodes;
2173 uint8_t idxRange = pTb->cRanges - 1;
2174
2175 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2176 pCall->idxInstr = pTb->cInstructions;
2177 pCall->cbOpcode = cbInstr;
2178 pCall->offOpcode = offOpcode;
2179 pCall->uTbLookup = 0;
2180 pCall->uUnused0 = 0;
2181 pCall->auParams[0] = (uint32_t)cbInstr
2182 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2183 /* The upper dword is sometimes used for cbStartPage. */;
2184 pCall->auParams[1] = idxRange;
2185 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2186
2187/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2188 * gotten onto. If we do, stop */
2189
2190 /*
2191 * Case 1: We've branched (RIP changed).
2192 *
2193 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2194 * TB, end the TB here as it is most likely a loop and if it
2195 * made sense to unroll it, the guest code compiler should've
2196 * done it already.
2197 *
2198 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2199 * Req: 1 extra range, no extra phys.
2200 *
2201 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2202 * necessary (fTbCrossedPage is true).
2203 * Req: 1 extra range, probably 1 extra phys page entry.
2204 *
2205 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2206 * but in addition we cross into the following page and require
2207 * another TLB load.
2208 * Req: 2 extra ranges, probably 2 extra phys page entries.
2209 *
2210 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2211 * the following page (thus fTbCrossedPage is true).
2212 * Req: 2 extra ranges, probably 1 extra phys page entry.
2213 *
2214 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2215 * it may trigger "spuriously" from the CPU point of view because of
2216 * physical page changes that'll invalid the physical TLB and trigger a
2217 * call to the function. In theory this be a big deal, just a bit
2218 * performance loss as we'll pick the LoadingTlb variants.
2219 *
2220 * Note! We do not currently optimize branching to the next instruction (sorry
2221 * 32-bit PIC code). We could maybe do that in the branching code that
2222 * sets (or not) fTbBranched.
2223 */
2224 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2225 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2226 * code. This'll require filtering out far jmps and calls, as they
2227 * load CS which should technically be considered indirect since the
2228 * GDT/LDT entry's base address can be modified independently from
2229 * the code. */
2230 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2231 {
2232 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2233 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2234 {
2235 /* 1a + 1b - instruction fully within the branched to page. */
2236 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2237 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2238
2239 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2240 {
2241 /* Check that we've got a free range. */
2242 idxRange += 1;
2243 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2244 { /* likely */ }
2245 else
2246 {
2247 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2248 return false;
2249 }
2250 pCall->auParams[1] = idxRange;
2251 pCall->auParams[2] = 0;
2252
2253 /* Check that we've got a free page slot. */
2254 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2255 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2256 uint8_t idxPhysPage;
2257 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2258 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2259 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2260 {
2261 pTb->aGCPhysPages[0] = GCPhysNew;
2262 pTb->aRanges[idxRange].idxPhysPage = 1;
2263 idxPhysPage = UINT8_MAX;
2264 }
2265 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2266 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2267 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2268 {
2269 pTb->aGCPhysPages[1] = GCPhysNew;
2270 pTb->aRanges[idxRange].idxPhysPage = 2;
2271 idxPhysPage = UINT8_MAX;
2272 }
2273 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2274 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2275 else
2276 {
2277 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2278 return false;
2279 }
2280
2281 /* Loop check: We weave the loop check in here to optimize the lookup. */
2282 if (idxPhysPage != UINT8_MAX)
2283 {
2284 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2285 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2286 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2287 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2288 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2289 {
2290 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2291 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2292 return false;
2293 }
2294 }
2295
2296 /* Finish setting up the new range. */
2297 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2298 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2299 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2300 pTb->aRanges[idxRange].u2Unused = 0;
2301 pTb->cRanges++;
2302 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2303 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2304 pTb->aRanges[idxRange].offOpcodes));
2305 }
2306 else
2307 {
2308 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2309 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2310 }
2311
2312 /* Determin which function we need to load & check.
2313 Note! For jumps to a new page, we'll set both fTbBranched and
2314 fTbCrossedPage to avoid unnecessary TLB work for intra
2315 page branching */
2316 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2317 || pVCpu->iem.s.fTbCrossedPage)
2318 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2319 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2320 : !fConsiderCsLimChecking
2321 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2322 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2323 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2324 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2325 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2326 : !fConsiderCsLimChecking
2327 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2328 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2329 else
2330 {
2331 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2332 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2333 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2334 : !fConsiderCsLimChecking
2335 ? kIemThreadedFunc_BltIn_CheckOpcodes
2336 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2337 }
2338 }
2339 else
2340 {
2341 /* 1c + 1d - instruction crosses pages. */
2342 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2343 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2344
2345 /* Lazy bird: Check that this isn't case 1c, since we've already
2346 load the first physical address. End the TB and
2347 make it a case 2b instead.
2348
2349 Hmm. Too much bother to detect, so just do the same
2350 with case 1d as well. */
2351#if 0 /** @todo get back to this later when we've got the actual branch code in
2352 * place. */
2353 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2354
2355 /* Check that we've got two free ranges. */
2356 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2357 { /* likely */ }
2358 else
2359 return false;
2360 idxRange += 1;
2361 pCall->auParams[1] = idxRange;
2362 pCall->auParams[2] = 0;
2363
2364 /* ... */
2365
2366#else
2367 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2368 return false;
2369#endif
2370 }
2371 }
2372
2373 /*
2374 * Case 2: Page crossing.
2375 *
2376 * Sub-case 2a: The instruction starts on the first byte in the next page.
2377 *
2378 * Sub-case 2b: The instruction has opcode bytes in both the current and
2379 * following page.
2380 *
2381 * Both cases requires a new range table entry and probably a new physical
2382 * page entry. The difference is in which functions to emit and whether to
2383 * add bytes to the current range.
2384 */
2385 else if (pVCpu->iem.s.fTbCrossedPage)
2386 {
2387 /* Check that we've got a free range. */
2388 idxRange += 1;
2389 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2390 { /* likely */ }
2391 else
2392 {
2393 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2394 return false;
2395 }
2396
2397 /* Check that we've got a free page slot. */
2398 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2399 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2400 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2401 pTb->aRanges[idxRange].idxPhysPage = 0;
2402 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2403 || pTb->aGCPhysPages[0] == GCPhysNew)
2404 {
2405 pTb->aGCPhysPages[0] = GCPhysNew;
2406 pTb->aRanges[idxRange].idxPhysPage = 1;
2407 }
2408 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2409 || pTb->aGCPhysPages[1] == GCPhysNew)
2410 {
2411 pTb->aGCPhysPages[1] = GCPhysNew;
2412 pTb->aRanges[idxRange].idxPhysPage = 2;
2413 }
2414 else
2415 {
2416 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2417 return false;
2418 }
2419
2420 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2421 {
2422 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2423 pCall->auParams[1] = idxRange;
2424 pCall->auParams[2] = 0;
2425
2426 /* Finish setting up the new range. */
2427 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2428 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2429 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2430 pTb->aRanges[idxRange].u2Unused = 0;
2431 pTb->cRanges++;
2432 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2433 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2434 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2435
2436 /* Determin which function we need to load & check. */
2437 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2438 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2439 : !fConsiderCsLimChecking
2440 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2441 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2442 }
2443 else
2444 {
2445 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2446 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2447 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2448 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2449
2450 /* We've good. Split the instruction over the old and new range table entries. */
2451 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2452
2453 pTb->aRanges[idxRange].offPhysPage = 0;
2454 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2455 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2456 pTb->aRanges[idxRange].u2Unused = 0;
2457 pTb->cRanges++;
2458 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2459 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2460 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2461
2462 /* Determin which function we need to load & check. */
2463 if (pVCpu->iem.s.fTbCheckOpcodes)
2464 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2465 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2466 : !fConsiderCsLimChecking
2467 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2468 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2469 else
2470 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2471 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2472 : !fConsiderCsLimChecking
2473 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2474 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2475 }
2476 }
2477
2478 /*
2479 * Regular case: No new range required.
2480 */
2481 else
2482 {
2483 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2484 if (pVCpu->iem.s.fTbCheckOpcodes)
2485 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2486 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2487 : kIemThreadedFunc_BltIn_CheckOpcodes;
2488 else
2489 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2490
2491 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2492 pTb->cbOpcodes = offOpcode + cbInstr;
2493 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2494 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2495 }
2496
2497 /*
2498 * Commit the call.
2499 */
2500 pTb->Thrd.cCalls++;
2501
2502 /*
2503 * Clear state.
2504 */
2505 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2506 pVCpu->iem.s.fTbCrossedPage = false;
2507 pVCpu->iem.s.fTbCheckOpcodes = false;
2508
2509 /*
2510 * Copy opcode bytes.
2511 */
2512 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2513 pTb->cbOpcodes = offOpcode + cbInstr;
2514 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2515
2516 return true;
2517}
2518
2519
2520/**
2521 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2522 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2523 *
2524 * @returns true if anything is pending, false if not.
2525 * @param pVCpu The cross context virtual CPU structure of the calling
2526 * thread.
2527 */
2528DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2529{
2530 uint64_t fCpu = pVCpu->fLocalForcedActions;
2531 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2532#if 1
2533 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2534 if (RT_LIKELY( !fCpu
2535 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2536 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2537 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2538 return false;
2539 return true;
2540#else
2541 return false;
2542#endif
2543
2544}
2545
2546
2547/**
2548 * Called by iemThreadedCompile when a block requires a mode check.
2549 *
2550 * @returns true if we should continue, false if we're out of call entries.
2551 * @param pVCpu The cross context virtual CPU structure of the calling
2552 * thread.
2553 * @param pTb The translation block being compiled.
2554 */
2555static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2556{
2557 /* Emit the call. */
2558 uint32_t const idxCall = pTb->Thrd.cCalls;
2559 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2560 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2561 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2562 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2563 pCall->idxInstr = pTb->cInstructions - 1;
2564 pCall->cbOpcode = 0;
2565 pCall->offOpcode = 0;
2566 pCall->uTbLookup = 0;
2567 pCall->uUnused0 = 0;
2568 pCall->auParams[0] = pVCpu->iem.s.fExec;
2569 pCall->auParams[1] = 0;
2570 pCall->auParams[2] = 0;
2571 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2572 return true;
2573}
2574
2575
2576/**
2577 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2578 * set.
2579 *
2580 * @returns true if we should continue, false if an IRQ is deliverable or a
2581 * relevant force flag is pending.
2582 * @param pVCpu The cross context virtual CPU structure of the calling
2583 * thread.
2584 * @param pTb The translation block being compiled.
2585 * @sa iemThreadedCompileCheckIrq
2586 */
2587bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2588{
2589 /*
2590 * Skip this we've already emitted a call after the previous instruction
2591 * or if it's the first call, as we're always checking FFs between blocks.
2592 */
2593 uint32_t const idxCall = pTb->Thrd.cCalls;
2594 if ( idxCall > 0
2595 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2596 {
2597 /* Emit the call. */
2598 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2599 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2600 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2601 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2602 pCall->idxInstr = pTb->cInstructions;
2603 pCall->offOpcode = 0;
2604 pCall->cbOpcode = 0;
2605 pCall->uTbLookup = 0;
2606 pCall->uUnused0 = 0;
2607 pCall->auParams[0] = 0;
2608 pCall->auParams[1] = 0;
2609 pCall->auParams[2] = 0;
2610 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2611
2612 /* Reset the IRQ check value. */
2613 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2614
2615 /*
2616 * Check for deliverable IRQs and pending force flags.
2617 */
2618 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2619 }
2620 return true; /* continue */
2621}
2622
2623
2624/**
2625 * Emits an IRQ check call and checks for pending IRQs.
2626 *
2627 * @returns true if we should continue, false if an IRQ is deliverable or a
2628 * relevant force flag is pending.
2629 * @param pVCpu The cross context virtual CPU structure of the calling
2630 * thread.
2631 * @param pTb The transation block.
2632 * @sa iemThreadedCompileBeginEmitCallsComplications
2633 */
2634static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2635{
2636 /* Check again in a little bit, unless it is immediately following an STI
2637 in which case we *must* check immediately after the next instruction
2638 as well in case it's executed with interrupt inhibition. We could
2639 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2640 bs3-timers-1 which is doing sti + sti + cli. */
2641 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2642 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2643 else
2644 {
2645 pVCpu->iem.s.fTbCurInstrIsSti = false;
2646 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2647 }
2648 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2649
2650 /*
2651 * Emit the call.
2652 */
2653 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2654 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2655 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2656 pCall->idxInstr = pTb->cInstructions;
2657 pCall->offOpcode = 0;
2658 pCall->cbOpcode = 0;
2659 pCall->uTbLookup = 0;
2660 pCall->uUnused0 = 0;
2661 pCall->auParams[0] = 0;
2662 pCall->auParams[1] = 0;
2663 pCall->auParams[2] = 0;
2664
2665 /*
2666 * Check for deliverable IRQs and pending force flags.
2667 */
2668 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2669}
2670
2671
2672/**
2673 * Compiles a new TB and executes it.
2674 *
2675 * We combine compilation and execution here as it makes it simpler code flow
2676 * in the main loop and it allows interpreting while compiling if we want to
2677 * explore that option.
2678 *
2679 * @returns Strict VBox status code.
2680 * @param pVM The cross context virtual machine structure.
2681 * @param pVCpu The cross context virtual CPU structure of the calling
2682 * thread.
2683 * @param GCPhysPc The physical address corresponding to the current
2684 * RIP+CS.BASE.
2685 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2686 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2687 */
2688static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2689{
2690 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2691 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2692 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2693
2694 /*
2695 * Get the TB we use for the recompiling. This is a maxed-out TB so
2696 * that'll we'll make a more efficient copy of when we're done compiling.
2697 */
2698 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2699 if (pTb)
2700 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2701 else
2702 {
2703 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2704 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2705 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2706 }
2707
2708 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2709 functions may get at it. */
2710 pVCpu->iem.s.pCurTbR3 = pTb;
2711
2712#if 0
2713 /* Make sure the CheckIrq condition matches the one in EM. */
2714 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2715 const uint32_t cZeroCalls = 1;
2716#else
2717 const uint32_t cZeroCalls = 0;
2718#endif
2719
2720 /*
2721 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2722 */
2723 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2724 iemThreadedCompileInitOpcodeFetching(pVCpu);
2725 VBOXSTRICTRC rcStrict;
2726 for (;;)
2727 {
2728 /* Process the next instruction. */
2729#ifdef LOG_ENABLED
2730 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2731 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2732 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2733 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2734#endif
2735 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2736 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2737
2738 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2739#if 0
2740 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2741 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2742 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2743#endif
2744 if ( rcStrict == VINF_SUCCESS
2745 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2746 && !pVCpu->iem.s.fEndTb)
2747 {
2748 Assert(pTb->Thrd.cCalls > cCallsPrev);
2749 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2750
2751 pVCpu->iem.s.cInstructions++;
2752
2753 /* Check for mode change _after_ certain CIMPL calls, so check that
2754 we continue executing with the same mode value. */
2755 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2756 { /* probable */ }
2757 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2758 { /* extremely likely */ }
2759 else
2760 break;
2761
2762#if defined(LOG_ENABLED) && 0 /* for debugging */
2763 //iemThreadedCompileEmitNop(pTb);
2764 iemThreadedCompileEmitLogCpuState(pTb);
2765#endif
2766 }
2767 else
2768 {
2769 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2770 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2771 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2772 rcStrict = VINF_SUCCESS;
2773
2774 if (pTb->Thrd.cCalls > cZeroCalls)
2775 {
2776 if (cCallsPrev != pTb->Thrd.cCalls)
2777 pVCpu->iem.s.cInstructions++;
2778 break;
2779 }
2780
2781 pVCpu->iem.s.pCurTbR3 = NULL;
2782 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2783 }
2784
2785 /* Check for IRQs? */
2786 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2787 pVCpu->iem.s.cInstrTillIrqCheck--;
2788 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2789 break;
2790
2791 /* Still space in the TB? */
2792 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2793 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2794 && pTb->cTbLookupEntries < 127)
2795 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2796 else
2797 {
2798 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2799 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2800 break;
2801 }
2802 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2803 }
2804
2805 /*
2806 * Reserve lookup space for the final call entry if necessary.
2807 */
2808 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2809 if (pTb->Thrd.cCalls > 1)
2810 {
2811 if (pFinalCall->uTbLookup == 0)
2812 {
2813 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2814 pTb->cTbLookupEntries += 1;
2815 }
2816 }
2817 else if (pFinalCall->uTbLookup != 0)
2818 {
2819 Assert(pTb->cTbLookupEntries > 1);
2820 pFinalCall->uTbLookup -= 1;
2821 pTb->cTbLookupEntries -= 1;
2822 }
2823
2824 /*
2825 * Duplicate the TB into a completed one and link it.
2826 */
2827 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2828 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2829
2830 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2831
2832#ifdef IEM_COMPILE_ONLY_MODE
2833 /*
2834 * Execute the translation block.
2835 */
2836#endif
2837
2838 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2839}
2840
2841
2842
2843/*********************************************************************************************************************************
2844* Recompiled Execution Core *
2845*********************************************************************************************************************************/
2846
2847/** Helper for iemTbExec. */
2848DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
2849{
2850 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
2851 Assert(idx < pTb->cTbLookupEntries);
2852 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
2853}
2854
2855
2856/**
2857 * Executes a translation block.
2858 *
2859 * @returns Strict VBox status code.
2860 * @param pVCpu The cross context virtual CPU structure of the calling
2861 * thread.
2862 * @param pTb The translation block to execute.
2863 */
2864static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2865{
2866 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2867
2868 /*
2869 * Set the current TB so CIMPL functions may get at it.
2870 */
2871 pVCpu->iem.s.pCurTbR3 = pTb;
2872 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
2873
2874 /*
2875 * Execute the block.
2876 */
2877#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2878 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2879 {
2880 pVCpu->iem.s.cTbExecNative++;
2881 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
2882# ifdef LOG_ENABLED
2883 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2884# endif
2885
2886# ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
2887# ifdef RT_ARCH_AMD64
2888 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2889# else
2890 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2891# endif
2892# else
2893# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2894 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
2895# endif
2896# ifdef RT_ARCH_AMD64
2897 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
2898# else
2899 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
2900# endif
2901# endif
2902
2903# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2904 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2905# endif
2906# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
2907 /* Restore FPCR/MXCSR if the TB modified it. */
2908 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
2909 {
2910 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
2911 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
2912 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
2913 }
2914# endif
2915# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
2916 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
2917# endif
2918 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2919 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2920 { /* likely */ }
2921 else
2922 {
2923 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2924 pVCpu->iem.s.pCurTbR3 = NULL;
2925
2926 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2927 only to break out of TB execution early. */
2928 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2929 {
2930 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
2931 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2932 }
2933
2934 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
2935 only to break out of TB execution early due to pending FFs. */
2936 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
2937 {
2938 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
2939 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2940 }
2941
2942 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2943 and converted to VINF_SUCCESS or whatever is appropriate. */
2944 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2945 {
2946 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
2947 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2948 }
2949
2950 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
2951 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2952 }
2953 }
2954 else
2955#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2956 {
2957 /*
2958 * The threaded execution loop.
2959 */
2960 pVCpu->iem.s.cTbExecThreaded++;
2961 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
2962#ifdef LOG_ENABLED
2963 uint64_t uRipPrev = UINT64_MAX;
2964#endif
2965 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2966 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2967 while (cCallsLeft-- > 0)
2968 {
2969#ifdef LOG_ENABLED
2970 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2971 {
2972 uRipPrev = pVCpu->cpum.GstCtx.rip;
2973 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2974 }
2975 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2976 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2977 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2978#endif
2979#ifdef VBOX_WITH_STATISTICS
2980 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2981 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2982#endif
2983 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2984 pCallEntry->auParams[0],
2985 pCallEntry->auParams[1],
2986 pCallEntry->auParams[2]);
2987 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2988 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2989 pCallEntry++;
2990 else
2991 {
2992 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2993 pVCpu->iem.s.pCurTbR3 = NULL;
2994 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
2995 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
2996
2997 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2998 only to break out of TB execution early. */
2999 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3000 {
3001#ifdef VBOX_WITH_STATISTICS
3002 if (pCallEntry->uTbLookup)
3003 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3004 else
3005 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3006#endif
3007 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3008 }
3009 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3010 }
3011 }
3012
3013 /* Update the lookup entry. */
3014 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3015 }
3016
3017 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3018 pVCpu->iem.s.pCurTbR3 = NULL;
3019 return VINF_SUCCESS;
3020}
3021
3022
3023/**
3024 * This is called when the PC doesn't match the current pbInstrBuf.
3025 *
3026 * Upon return, we're ready for opcode fetching. But please note that
3027 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3028 * MMIO or unassigned).
3029 */
3030static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3031{
3032 pVCpu->iem.s.pbInstrBuf = NULL;
3033 pVCpu->iem.s.offCurInstrStart = 0;
3034 pVCpu->iem.s.offInstrNextByte = 0;
3035 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3036 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3037}
3038
3039
3040/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3041DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3042{
3043 /*
3044 * Set uCurTbStartPc to RIP and calc the effective PC.
3045 */
3046 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3047 pVCpu->iem.s.uCurTbStartPc = uPc;
3048 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3049 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3050
3051 /*
3052 * Advance within the current buffer (PAGE) when possible.
3053 */
3054 if (pVCpu->iem.s.pbInstrBuf)
3055 {
3056 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3057 if (off < pVCpu->iem.s.cbInstrBufTotal)
3058 {
3059 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3060 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3061 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3062 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3063 else
3064 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3065
3066 return pVCpu->iem.s.GCPhysInstrBuf + off;
3067 }
3068 }
3069 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3070}
3071
3072
3073/**
3074 * Determines the extra IEMTB_F_XXX flags.
3075 *
3076 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3077 * IEMTB_F_CS_LIM_CHECKS (or zero).
3078 * @param pVCpu The cross context virtual CPU structure of the calling
3079 * thread.
3080 */
3081DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3082{
3083 uint32_t fRet = 0;
3084
3085 /*
3086 * Determine the inhibit bits.
3087 */
3088 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3089 { /* typical */ }
3090 else
3091 {
3092 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3093 fRet |= IEMTB_F_INHIBIT_SHADOW;
3094 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3095 fRet |= IEMTB_F_INHIBIT_NMI;
3096 }
3097
3098 /*
3099 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3100 * likely to go invalid before the end of the translation block.
3101 */
3102 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3103 return fRet;
3104
3105 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3106 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3107 return fRet;
3108 return fRet | IEMTB_F_CS_LIM_CHECKS;
3109}
3110
3111
3112VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
3113{
3114 /*
3115 * See if there is an interrupt pending in TRPM, inject it if we can.
3116 */
3117 if (!TRPMHasTrap(pVCpu))
3118 { /* likely */ }
3119 else
3120 {
3121 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3122 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3123 { /*likely */ }
3124 else
3125 return rcStrict;
3126 }
3127
3128 /*
3129 * Init the execution environment.
3130 */
3131#if 1 /** @todo this seems like a good idea, however if we ever share memory
3132 * directly with other threads on the host, it isn't necessarily... */
3133 if (pVM->cCpus == 1)
3134 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3135 else
3136#endif
3137 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3138 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
3139 { }
3140 else
3141 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
3142 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3143
3144 /*
3145 * Run-loop.
3146 *
3147 * If we're using setjmp/longjmp we combine all the catching here to avoid
3148 * having to call setjmp for each block we're executing.
3149 */
3150 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3151 for (;;)
3152 {
3153 VBOXSTRICTRC rcStrict;
3154 IEM_TRY_SETJMP(pVCpu, rcStrict)
3155 {
3156 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
3157 for (uint32_t iIterations = 0; ; iIterations++)
3158 {
3159 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3160 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3161 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3162 {
3163 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3164 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3165 if (pTb)
3166 rcStrict = iemTbExec(pVCpu, pTb);
3167 else
3168 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3169 }
3170 else
3171 {
3172 /* This can only happen if the current PC cannot be translated into a
3173 host pointer, which means we're in MMIO or unmapped memory... */
3174#if defined(VBOX_STRICT) && defined(IN_RING3)
3175 rcStrict = DBGFSTOP(pVM);
3176 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3177 return rcStrict;
3178#endif
3179 rcStrict = IEMExecLots(pVCpu, 2048, cPollRate, NULL);
3180 }
3181 if (rcStrict == VINF_SUCCESS)
3182 {
3183 Assert(pVCpu->iem.s.cActiveMappings == 0);
3184
3185 uint64_t fCpu = pVCpu->fLocalForcedActions;
3186 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3187 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3188 | VMCPU_FF_TLB_FLUSH
3189 | VMCPU_FF_UNHALT );
3190 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3191 if (RT_LIKELY( ( !fCpu
3192 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3193 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3194 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3195 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3196 {
3197 if (RT_LIKELY( (iIterations & cPollRate) != 0
3198 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
3199 { /* likely */ }
3200 else
3201 return VINF_SUCCESS;
3202 }
3203 else
3204 return VINF_SUCCESS;
3205 }
3206 else
3207 return rcStrict;
3208 }
3209 }
3210 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3211 {
3212 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3213 pVCpu->iem.s.cLongJumps++;
3214#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3215 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3216#endif
3217 if (pVCpu->iem.s.cActiveMappings > 0)
3218 iemMemRollback(pVCpu);
3219
3220#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3221 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3222 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3223 {
3224 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3225# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3226 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3227 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3228# endif
3229
3230#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3231 /* Restore FPCR/MXCSR if the TB modified it. */
3232 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3233 {
3234 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3235 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3236 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3237 }
3238#endif
3239 }
3240#endif
3241
3242#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3243 /* If pTb isn't NULL we're in iemTbExec. */
3244 if (!pTb)
3245 {
3246 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3247 pTb = pVCpu->iem.s.pCurTbR3;
3248 if (pTb)
3249 {
3250 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3251 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3252 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3253 }
3254 }
3255#endif
3256 pVCpu->iem.s.pCurTbR3 = NULL;
3257 return rcStrict;
3258 }
3259 IEM_CATCH_LONGJMP_END(pVCpu);
3260 }
3261}
3262
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette