VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp

Last change on this file was 106453, checked in by vboxsync, 6 weeks ago

VMM/IEM: Eliminated the IEMNATIVE_WITH_SIMD_REG_ALLOCATOR define. Fixed bug in iemNativeEmitMemFetchStoreDataCommon where a SIMD register was masked in calls to iemNativeVarSaveVolatileRegsPreHlpCall and friends. Fixed theoretical loop-forever bugs in iemNativeSimdRegAllocFindFree & iemNativeRegAllocFindFree. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 161.5 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 106453 2024-10-17 13:54:35Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116
117
118/*********************************************************************************************************************************
119* Internal Functions *
120*********************************************************************************************************************************/
121#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
122static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb);
123#endif
124
125
126/**
127 * Calculates the effective address of a ModR/M memory operand, extended version
128 * for use in the recompilers.
129 *
130 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
131 *
132 * May longjmp on internal error.
133 *
134 * @return The effective address.
135 * @param pVCpu The cross context virtual CPU structure of the calling thread.
136 * @param bRm The ModRM byte.
137 * @param cbImmAndRspOffset - First byte: The size of any immediate
138 * following the effective address opcode bytes
139 * (only for RIP relative addressing).
140 * - Second byte: RSP displacement (for POP [ESP]).
141 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
142 * SIB byte (bits 39:32).
143 *
144 * @note This must be defined in a source file with matching
145 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
146 * or implemented differently...
147 */
148RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
149{
150 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
151# define SET_SS_DEF() \
152 do \
153 { \
154 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
155 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
156 } while (0)
157
158 if (!IEM_IS_64BIT_CODE(pVCpu))
159 {
160/** @todo Check the effective address size crap! */
161 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
162 {
163 uint16_t u16EffAddr;
164
165 /* Handle the disp16 form with no registers first. */
166 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
167 {
168 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
169 *puInfo = u16EffAddr;
170 }
171 else
172 {
173 /* Get the displacment. */
174 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
175 {
176 case 0: u16EffAddr = 0; break;
177 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
178 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
179 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
180 }
181 *puInfo = u16EffAddr;
182
183 /* Add the base and index registers to the disp. */
184 switch (bRm & X86_MODRM_RM_MASK)
185 {
186 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
187 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
188 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
189 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
190 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
191 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
192 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
193 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
194 }
195 }
196
197 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
198 return u16EffAddr;
199 }
200
201 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
202 uint32_t u32EffAddr;
203 uint64_t uInfo;
204
205 /* Handle the disp32 form with no registers first. */
206 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
207 {
208 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
209 uInfo = u32EffAddr;
210 }
211 else
212 {
213 /* Get the register (or SIB) value. */
214 uInfo = 0;
215 switch ((bRm & X86_MODRM_RM_MASK))
216 {
217 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
218 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
219 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
220 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
221 case 4: /* SIB */
222 {
223 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
224 uInfo = (uint64_t)bSib << 32;
225
226 /* Get the index and scale it. */
227 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
228 {
229 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
230 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
231 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
232 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
233 case 4: u32EffAddr = 0; /*none */ break;
234 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
235 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
236 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
237 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
238 }
239 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
240
241 /* add base */
242 switch (bSib & X86_SIB_BASE_MASK)
243 {
244 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
245 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
246 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
247 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
248 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
249 case 5:
250 if ((bRm & X86_MODRM_MOD_MASK) != 0)
251 {
252 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
253 SET_SS_DEF();
254 }
255 else
256 {
257 uint32_t u32Disp;
258 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
259 u32EffAddr += u32Disp;
260 uInfo |= u32Disp;
261 }
262 break;
263 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
264 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
265 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
266 }
267 break;
268 }
269 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
270 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
271 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
272 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
273 }
274
275 /* Get and add the displacement. */
276 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
277 {
278 case 0:
279 break;
280 case 1:
281 {
282 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
283 u32EffAddr += i8Disp;
284 uInfo |= (uint32_t)(int32_t)i8Disp;
285 break;
286 }
287 case 2:
288 {
289 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
290 u32EffAddr += u32Disp;
291 uInfo |= u32Disp;
292 break;
293 }
294 default:
295 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
296 }
297 }
298
299 *puInfo = uInfo;
300 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
301 return u32EffAddr;
302 }
303
304 uint64_t u64EffAddr;
305 uint64_t uInfo;
306
307 /* Handle the rip+disp32 form with no registers first. */
308 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
309 {
310 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
311 uInfo = (uint32_t)u64EffAddr;
312 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
313 }
314 else
315 {
316 /* Get the register (or SIB) value. */
317 uInfo = 0;
318 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
319 {
320 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
321 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
322 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
323 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
324 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
325 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
326 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
327 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
328 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
329 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
330 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
331 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
332 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
333 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
334 /* SIB */
335 case 4:
336 case 12:
337 {
338 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
339 uInfo = (uint64_t)bSib << 32;
340
341 /* Get the index and scale it. */
342 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
343 {
344 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
345 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
346 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
347 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
348 case 4: u64EffAddr = 0; /*none */ break;
349 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
350 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
351 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
352 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
353 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
354 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
355 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
356 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
357 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
358 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
359 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
360 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
361 }
362 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
363
364 /* add base */
365 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
366 {
367 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
368 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
369 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
370 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
371 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
372 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
373 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
374 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
375 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
376 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
377 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
378 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
379 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
380 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
381 /* complicated encodings */
382 case 5:
383 case 13:
384 if ((bRm & X86_MODRM_MOD_MASK) != 0)
385 {
386 if (!pVCpu->iem.s.uRexB)
387 {
388 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
389 SET_SS_DEF();
390 }
391 else
392 u64EffAddr += pVCpu->cpum.GstCtx.r13;
393 }
394 else
395 {
396 uint32_t u32Disp;
397 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
398 u64EffAddr += (int32_t)u32Disp;
399 uInfo |= u32Disp;
400 }
401 break;
402 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
403 }
404 break;
405 }
406 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
407 }
408
409 /* Get and add the displacement. */
410 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
411 {
412 case 0:
413 break;
414 case 1:
415 {
416 int8_t i8Disp;
417 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
418 u64EffAddr += i8Disp;
419 uInfo |= (uint32_t)(int32_t)i8Disp;
420 break;
421 }
422 case 2:
423 {
424 uint32_t u32Disp;
425 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
426 u64EffAddr += (int32_t)u32Disp;
427 uInfo |= u32Disp;
428 break;
429 }
430 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
431 }
432
433 }
434
435 *puInfo = uInfo;
436 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
437 {
438 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
439 return u64EffAddr;
440 }
441 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
442 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
443 return u64EffAddr & UINT32_MAX;
444}
445
446
447
448/*********************************************************************************************************************************
449* Translation Block Cache. *
450*********************************************************************************************************************************/
451
452/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
453static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
454{
455 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
456 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
457 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
458 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
459 if (cMsSinceUse1 != cMsSinceUse2)
460 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
461 if (pTb1->cUsed != pTb2->cUsed)
462 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
463 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
464 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
465 return 0;
466}
467
468#ifdef VBOX_STRICT
469/**
470 * Assertion helper that checks a collisions list count.
471 */
472static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
473{
474 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
475 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
476 while (pTb)
477 {
478 pTb = pTb->pNext;
479 cLeft--;
480 }
481 AssertMsg(cLeft == 0,
482 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
483 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
484}
485#endif
486
487
488DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
489{
490 STAM_PROFILE_START(&pTbCache->StatPrune, a);
491
492 /*
493 * First convert the collision list to an array.
494 */
495 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
496 uintptr_t cInserted = 0;
497 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
498
499 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
500
501 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
502 {
503 apSortedTbs[cInserted++] = pTbCollision;
504 pTbCollision = pTbCollision->pNext;
505 }
506
507 /* Free any excess (impossible). */
508 if (RT_LIKELY(!pTbCollision))
509 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
510 else
511 do
512 {
513 PIEMTB pTbToFree = pTbCollision;
514 pTbCollision = pTbToFree->pNext;
515 iemTbAllocatorFree(pVCpu, pTbToFree);
516 } while (pTbCollision);
517
518 /*
519 * Sort it by most recently used and usage count.
520 */
521 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
522
523 /* We keep half the list for now. Perhaps a bit aggressive... */
524 uintptr_t const cKeep = cInserted / 2;
525
526 /* First free up the TBs we don't wish to keep (before creating the new
527 list because otherwise the free code will scan the list for each one
528 without ever finding it). */
529 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
530 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
531
532 /* Then chain the new TB together with the ones we like to keep of the
533 existing ones and insert this list into the hash table. */
534 pTbCollision = pTb;
535 for (uintptr_t idx = 0; idx < cKeep; idx++)
536 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
537 pTbCollision->pNext = NULL;
538
539 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
540#ifdef VBOX_STRICT
541 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
542#endif
543
544 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
545}
546
547
548static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
549{
550 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
551 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
552 if (!pTbOldHead)
553 {
554 pTb->pNext = NULL;
555 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
556 }
557 else
558 {
559 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
560 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
561 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
562 {
563 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
564 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
565#ifdef VBOX_STRICT
566 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
567#endif
568 }
569 else
570 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
571 }
572}
573
574
575/**
576 * Unlinks @a pTb from the hash table if found in it.
577 *
578 * @returns true if unlinked, false if not present.
579 * @param pTbCache The hash table.
580 * @param pTb The TB to remove.
581 */
582static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
583{
584 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
585 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
586 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
587
588 /*
589 * At the head of the collision list?
590 */
591 if (pTbHash == pTb)
592 {
593 if (!pTb->pNext)
594 pTbCache->apHash[idxHash] = NULL;
595 else
596 {
597 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
598 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
599#ifdef VBOX_STRICT
600 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
601#endif
602 }
603 return true;
604 }
605
606 /*
607 * Search the collision list.
608 */
609 PIEMTB const pTbHead = pTbHash;
610 while (pTbHash)
611 {
612 PIEMTB const pNextTb = pTbHash->pNext;
613 if (pNextTb == pTb)
614 {
615 pTbHash->pNext = pTb->pNext;
616 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
617#ifdef VBOX_STRICT
618 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
619#endif
620 return true;
621 }
622 pTbHash = pNextTb;
623 }
624 return false;
625}
626
627
628/**
629 * Looks up a TB for the given PC and flags in the cache.
630 *
631 * @returns Pointer to TB on success, NULL if not found.
632 * @param pVCpu The cross context virtual CPU structure of the
633 * calling thread.
634 * @param pTbCache The translation block cache.
635 * @param GCPhysPc The PC to look up a TB for.
636 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
637 * the lookup.
638 * @thread EMT(pVCpu)
639 */
640static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
641 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
642{
643 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
644
645 /*
646 * First consult the lookup table entry.
647 */
648 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
649 PIEMTB pTb = *ppTbLookup;
650 if (pTb)
651 {
652 if (pTb->GCPhysPc == GCPhysPc)
653 {
654 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
655 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
656 {
657 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
658 {
659 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
660 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
661 pTb->cUsed++;
662#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
663 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
664 {
665 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
666 return pTb;
667 }
668 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
669# ifdef VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING
670 iemThreadedSaveTbForProfiling(pVCpu, pTb);
671# endif
672 return iemNativeRecompile(pVCpu, pTb);
673#else
674 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
675 return pTb;
676#endif
677 }
678 }
679 }
680 }
681
682 /*
683 * Then consult the hash table.
684 */
685 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
686#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
687 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
688#endif
689 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
690 while (pTb)
691 {
692 if (pTb->GCPhysPc == GCPhysPc)
693 {
694 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
695 {
696 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
697 {
698 STAM_COUNTER_INC(&pTbCache->cLookupHits);
699 AssertMsg(cLeft > 0, ("%d\n", cLeft));
700
701 *ppTbLookup = pTb;
702 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
703 pTb->cUsed++;
704#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
705 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
706 {
707 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
708 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
709 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
710 return pTb;
711 }
712 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
713 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
714 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
715 return iemNativeRecompile(pVCpu, pTb);
716#else
717 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
718 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
719 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
720 return pTb;
721#endif
722 }
723 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
724 }
725 else
726 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
727 }
728 else
729 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
730
731 pTb = pTb->pNext;
732#ifdef VBOX_STRICT
733 cLeft--;
734#endif
735 }
736 AssertMsg(cLeft == 0, ("%d\n", cLeft));
737 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
738 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
739 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
740 return pTb;
741}
742
743
744/*********************************************************************************************************************************
745* Translation Block Allocator.
746*********************************************************************************************************************************/
747/*
748 * Translation block allocationmanagement.
749 */
750
751#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
752# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
753 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
754# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
755 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
756# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
757 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
758#else
759# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
760 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
761# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
762 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
763# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
764 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
765#endif
766/** Makes a TB index from a chunk index and TB index within that chunk. */
767#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
768 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
769
770
771/**
772 * Initializes the TB allocator and cache for an EMT.
773 *
774 * @returns VBox status code.
775 * @param pVM The VM handle.
776 * @param cInitialTbs The initial number of translation blocks to
777 * preallocator.
778 * @param cMaxTbs The max number of translation blocks allowed.
779 * @param cbInitialExec The initial size of the executable memory allocator.
780 * @param cbMaxExec The max size of the executable memory allocator.
781 * @param cbChunkExec The chunk size for executable memory allocator. Zero
782 * or UINT32_MAX for automatically determining this.
783 * @thread EMT
784 */
785DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
786 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
787{
788 PVMCPUCC pVCpu = VMMGetCpu(pVM);
789 Assert(!pVCpu->iem.s.pTbCacheR3);
790 Assert(!pVCpu->iem.s.pTbAllocatorR3);
791
792 /*
793 * Calculate the chunk size of the TB allocator.
794 * The minimum chunk size is 2MiB.
795 */
796 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
797 uint32_t cbPerChunk = _2M;
798 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
799#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
800 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
801 uint8_t cChunkShift = 21 - cTbShift;
802 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
803#endif
804 for (;;)
805 {
806 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
807 break;
808 cbPerChunk *= 2;
809 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
810#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
811 cChunkShift += 1;
812#endif
813 }
814
815 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
816 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
817 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
818
819 cMaxTbs = cMaxChunks * cTbsPerChunk;
820
821 /*
822 * Allocate and initalize it.
823 */
824 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
825 if (!pTbAllocator)
826 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
827 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
828 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
829 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
830 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
831 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
832 pTbAllocator->cbPerChunk = cbPerChunk;
833 pTbAllocator->cMaxTbs = cMaxTbs;
834 pTbAllocator->pTbsFreeHead = NULL;
835#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
836 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
837 pTbAllocator->cChunkShift = cChunkShift;
838 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
839#endif
840
841 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
842
843 /*
844 * Allocate the initial chunks.
845 */
846 for (uint32_t idxChunk = 0; ; idxChunk++)
847 {
848 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
849 if (!paTbs)
850 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
851 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
852 cbPerChunk, idxChunk, pVCpu->idCpu);
853
854 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
855 {
856 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
857 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
858 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
859 }
860 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
861 pTbAllocator->cTotalTbs += cTbsPerChunk;
862
863 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
864 break;
865 }
866
867 /*
868 * Calculate the size of the hash table. We double the max TB count and
869 * round it up to the nearest power of two.
870 */
871 uint32_t cCacheEntries = cMaxTbs * 2;
872 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
873 {
874 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
875 cCacheEntries = RT_BIT_32(iBitTop);
876 Assert(cCacheEntries >= cMaxTbs * 2);
877 }
878
879 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
880 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
881 if (!pTbCache)
882 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
883 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
884 cbTbCache, cCacheEntries, pVCpu->idCpu);
885
886 /*
887 * Initialize it (assumes zeroed by the allocator).
888 */
889 pTbCache->uMagic = IEMTBCACHE_MAGIC;
890 pTbCache->cHash = cCacheEntries;
891 pTbCache->uHashMask = cCacheEntries - 1;
892 Assert(pTbCache->cHash > pTbCache->uHashMask);
893 pVCpu->iem.s.pTbCacheR3 = pTbCache;
894
895 /*
896 * Initialize the native executable memory allocator.
897 */
898#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
899 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
900 AssertLogRelRCReturn(rc, rc);
901#else
902 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
903#endif
904
905 return VINF_SUCCESS;
906}
907
908
909/**
910 * Inner free worker.
911 *
912 * The @a a_fType parameter allows us to eliminate the type check when we know
913 * which type of TB is being freed.
914 */
915template<uint32_t a_fType>
916DECL_FORCE_INLINE(void)
917iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
918{
919#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
920 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED || a_fType == IEMTB_F_TYPE_NATIVE);
921#else
922 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED);
923#endif
924 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
925 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
926 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
927#ifdef VBOX_STRICT
928 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
929 Assert(pTbOther != pTb);
930#endif
931
932 /*
933 * Unlink the TB from the hash table.
934 */
935 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
936
937 /*
938 * Free the TB itself.
939 */
940 if RT_CONSTEXPR_IF(a_fType == 0)
941 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
942 {
943 case IEMTB_F_TYPE_THREADED:
944 pTbAllocator->cThreadedTbs -= 1;
945 RTMemFree(pTb->Thrd.paCalls);
946 break;
947#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
948 case IEMTB_F_TYPE_NATIVE:
949 pTbAllocator->cNativeTbs -= 1;
950 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
951 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
952 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
953 break;
954#endif
955 default:
956 AssertFailed();
957 }
958#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
959 else if RT_CONSTEXPR_IF(a_fType == IEMTB_F_TYPE_NATIVE)
960 {
961 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
962 pTbAllocator->cNativeTbs -= 1;
963 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
964 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
965 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
966 }
967#endif
968 else
969 {
970 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
971 pTbAllocator->cThreadedTbs -= 1;
972 RTMemFree(pTb->Thrd.paCalls);
973 }
974
975 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
976
977 pTb->pNext = pTbAllocator->pTbsFreeHead;
978 pTbAllocator->pTbsFreeHead = pTb;
979 pTb->fFlags = 0;
980 pTb->GCPhysPc = UINT64_MAX;
981 pTb->Gen.uPtr = 0;
982 pTb->Gen.uData = 0;
983 pTb->cTbLookupEntries = 0;
984 pTb->cbOpcodes = 0;
985 pTb->pabOpcodes = NULL;
986
987 Assert(pTbAllocator->cInUseTbs > 0);
988
989 pTbAllocator->cInUseTbs -= 1;
990 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
991}
992
993
994/**
995 * Frees the given TB.
996 *
997 * @param pVCpu The cross context virtual CPU structure of the calling
998 * thread.
999 * @param pTb The translation block to free.
1000 * @thread EMT(pVCpu)
1001 */
1002DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
1003{
1004 /*
1005 * Validate state.
1006 */
1007 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1008 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1009 uint8_t const idxChunk = pTb->idxAllocChunk;
1010 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1011 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1012 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1013
1014 /*
1015 * Invalidate the TB lookup pointer and call the inner worker.
1016 */
1017 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1018 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1019}
1020
1021#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
1022
1023/**
1024 * Interface used by iemExecMemAllocatorPrune.
1025 */
1026DECLHIDDEN(void) iemTbAllocatorFreeBulk(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb)
1027{
1028 Assert(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1029
1030 uint8_t const idxChunk = pTb->idxAllocChunk;
1031 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1032 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1033 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1034
1035 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1036}
1037
1038
1039/**
1040 * Interface used by iemExecMemAllocatorPrune.
1041 */
1042DECLHIDDEN(PIEMTBALLOCATOR) iemTbAllocatorFreeBulkStart(PVMCPUCC pVCpu)
1043{
1044 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1045 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1046
1047 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1048
1049 /* It should be sufficient to do this once. */
1050 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1051
1052 return pTbAllocator;
1053}
1054
1055#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
1056
1057/**
1058 * Schedules a TB for freeing when it's not longer being executed and/or part of
1059 * the caller's call stack.
1060 *
1061 * The TB will be removed from the translation block cache, though, so it isn't
1062 * possible to executed it again and the IEMTB::pNext member can be used to link
1063 * it together with other TBs awaiting freeing.
1064 *
1065 * @param pVCpu The cross context virtual CPU structure of the calling
1066 * thread.
1067 * @param pTb The translation block to schedule for freeing.
1068 */
1069static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1070{
1071 /*
1072 * Validate state.
1073 */
1074 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1075 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1076 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1077 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1078 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1079 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1080#ifdef VBOX_STRICT
1081 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1082 Assert(pTbOther != pTb);
1083#endif
1084
1085 /*
1086 * Remove it from the cache and prepend it to the allocator's todo list.
1087 *
1088 * Note! It could still be in various lookup tables, so we trash the GCPhys
1089 * and CS attribs to ensure it won't be reused.
1090 */
1091 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1092 pTb->GCPhysPc = NIL_RTGCPHYS;
1093 pTb->x86.fAttr = UINT16_MAX;
1094
1095 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1096 pTbAllocator->pDelayedFreeHead = pTb;
1097}
1098
1099
1100/**
1101 * Processes the delayed frees.
1102 *
1103 * This is called by the allocator function as well as the native recompile
1104 * function before making any TB or executable memory allocations respectively.
1105 */
1106void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1107{
1108 /** @todo r-bird: these have already been removed from the cache,
1109 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1110 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1111 pTbAllocator->pDelayedFreeHead = NULL;
1112 while (pTb)
1113 {
1114 PIEMTB const pTbNext = pTb->pNext;
1115 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1116 iemTbAllocatorFree(pVCpu, pTb);
1117 pTb = pTbNext;
1118 }
1119}
1120
1121
1122#if 0
1123/**
1124 * Frees all TBs.
1125 */
1126static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1127{
1128 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1129 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1130 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1131
1132 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1133
1134 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1135 while (idxChunk-- > 0)
1136 {
1137 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1138 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1139 while (idxTb-- > 0)
1140 {
1141 PIEMTB const pTb = &paTbs[idxTb];
1142 if (pTb->fFlags)
1143 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1144 }
1145 }
1146
1147 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1148
1149# if 1
1150 /* Reset the free list. */
1151 pTbAllocator->pTbsFreeHead = NULL;
1152 idxChunk = pTbAllocator->cAllocatedChunks;
1153 while (idxChunk-- > 0)
1154 {
1155 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1156 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1157 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1158 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1159 {
1160 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1161 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1162 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1163 }
1164 }
1165# endif
1166
1167# if 1
1168 /* Completely reset the TB cache. */
1169 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1170# endif
1171
1172 return VINF_SUCCESS;
1173}
1174#endif
1175
1176
1177/**
1178 * Grow the translation block allocator with another chunk.
1179 */
1180static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1181{
1182 /*
1183 * Validate state.
1184 */
1185 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1186 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1187 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1188 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1189 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1190
1191 /*
1192 * Allocate a new chunk and add it to the allocator.
1193 */
1194 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1195 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1196 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1197
1198 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1199 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1200 {
1201 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1202 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1203 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1204 }
1205 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1206 pTbAllocator->cTotalTbs += cTbsPerChunk;
1207
1208 return VINF_SUCCESS;
1209}
1210
1211
1212/**
1213 * Allocates a TB from allocator with free block.
1214 *
1215 * This is common code to both the fast and slow allocator code paths.
1216 */
1217DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1218{
1219 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1220 Assert(pTbAllocator->pTbsFreeHead);
1221
1222 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1223 pTbAllocator->pTbsFreeHead = pTb->pNext;
1224 pTbAllocator->cInUseTbs += 1;
1225 if (fThreaded)
1226 pTbAllocator->cThreadedTbs += 1;
1227 else
1228 pTbAllocator->cNativeTbs += 1;
1229 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1230 return pTb;
1231}
1232
1233
1234/**
1235 * Slow path for iemTbAllocatorAlloc.
1236 */
1237static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1238{
1239 /*
1240 * With some luck we can add another chunk.
1241 */
1242 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1243 {
1244 int rc = iemTbAllocatorGrow(pVCpu);
1245 if (RT_SUCCESS(rc))
1246 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1247 }
1248
1249 /*
1250 * We have to prune stuff. Sigh.
1251 *
1252 * This requires scanning for older TBs and kick them out. Not sure how to
1253 * best do this as we don't want to maintain any list of TBs ordered by last
1254 * usage time. But one reasonably simple approach would be that each time we
1255 * get here we continue a sequential scan of the allocation chunks,
1256 * considering just a smallish number of TBs and freeing a fixed portion of
1257 * them. Say, we consider the next 128 TBs, freeing the least recently used
1258 * in out of groups of 4 TBs, resulting in 32 free TBs.
1259 */
1260 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1261 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1262 uint32_t const cTbsToPrune = 128;
1263 uint32_t const cTbsPerGroup = 4;
1264 uint32_t cFreedTbs = 0;
1265#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1266 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1267#else
1268 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1269#endif
1270 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1271 idxTbPruneFrom = 0;
1272 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1273 {
1274 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1275 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1276 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1277 uint32_t cMsAge = msNow - pTb->msLastUsed;
1278 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1279
1280 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1281 {
1282#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1283 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1284 { /* likely */ }
1285 else
1286 {
1287 idxInChunk2 = 0;
1288 idxChunk2 += 1;
1289 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1290 idxChunk2 = 0;
1291 }
1292#endif
1293 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1294 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1295 if ( cMsAge2 > cMsAge
1296 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1297 {
1298 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1299 pTb = pTb2;
1300 idxChunk = idxChunk2;
1301 idxInChunk = idxInChunk2;
1302 cMsAge = cMsAge2;
1303 }
1304 }
1305
1306 /* Free the TB. */
1307 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1308 cFreedTbs++; /* paranoia */
1309 }
1310 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1311 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1312
1313 /* Flush the TB lookup entry pointer. */
1314 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1315
1316 /*
1317 * Allocate a TB from the ones we've pruned.
1318 */
1319 if (cFreedTbs)
1320 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1321 return NULL;
1322}
1323
1324
1325/**
1326 * Allocate a translation block.
1327 *
1328 * @returns Pointer to block on success, NULL if we're out and is unable to
1329 * free up an existing one (very unlikely once implemented).
1330 * @param pVCpu The cross context virtual CPU structure of the calling
1331 * thread.
1332 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1333 * For statistics.
1334 */
1335DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1336{
1337 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1338 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1339
1340 /* Free any pending TBs before we proceed. */
1341 if (!pTbAllocator->pDelayedFreeHead)
1342 { /* probably likely */ }
1343 else
1344 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1345
1346 /* If the allocator is full, take slow code path.*/
1347 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1348 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1349 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1350}
1351
1352
1353#if 0 /*def VBOX_WITH_IEM_NATIVE_RECOMPILER*/
1354/**
1355 * This is called when we're out of space for native TBs.
1356 *
1357 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1358 * The difference is that we only prune native TBs and will only free any if
1359 * there are least two in a group. The conditions under which we're called are
1360 * different - there will probably be free TBs in the table when we're called.
1361 * Therefore we increase the group size and max scan length, though we'll stop
1362 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1363 * up at least 8 TBs.
1364 */
1365void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1366{
1367 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1368 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1369
1370 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1371
1372 /*
1373 * Flush the delayed free list before we start freeing TBs indiscriminately.
1374 */
1375 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1376
1377 /*
1378 * Scan and free TBs.
1379 */
1380 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1381 uint32_t const cTbsToPrune = 128 * 8;
1382 uint32_t const cTbsPerGroup = 4 * 4;
1383 uint32_t cFreedTbs = 0;
1384 uint32_t cMaxInstrs = 0;
1385 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1386 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1387 {
1388 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1389 idxTbPruneFrom = 0;
1390 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1391 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1392 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1393 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1394 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1395
1396 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1397 {
1398 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1399 { /* likely */ }
1400 else
1401 {
1402 idxInChunk2 = 0;
1403 idxChunk2 += 1;
1404 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1405 idxChunk2 = 0;
1406 }
1407 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1408 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1409 {
1410 cNativeTbs += 1;
1411 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1412 if ( cMsAge2 > cMsAge
1413 || ( cMsAge2 == cMsAge
1414 && ( pTb2->cUsed < pTb->cUsed
1415 || ( pTb2->cUsed == pTb->cUsed
1416 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1417 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1418 {
1419 pTb = pTb2;
1420 idxChunk = idxChunk2;
1421 idxInChunk = idxInChunk2;
1422 cMsAge = cMsAge2;
1423 }
1424 }
1425 }
1426
1427 /* Free the TB if we found at least two native one in this group. */
1428 if (cNativeTbs >= 2)
1429 {
1430 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1431 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1432 cFreedTbs++;
1433 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1434 break;
1435 }
1436 }
1437 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1438
1439 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1440}
1441#endif /* unused / VBOX_WITH_IEM_NATIVE_RECOMPILER */
1442
1443
1444/*********************************************************************************************************************************
1445* Threaded Recompiler Core *
1446*********************************************************************************************************************************/
1447/**
1448 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1449 * @returns pszBuf.
1450 * @param fFlags The flags.
1451 * @param pszBuf The output buffer.
1452 * @param cbBuf The output buffer size. At least 32 bytes.
1453 */
1454DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1455{
1456 Assert(cbBuf >= 32);
1457 static RTSTRTUPLE const s_aModes[] =
1458 {
1459 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1460 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1461 /* [02] = */ { RT_STR_TUPLE("!2!") },
1462 /* [03] = */ { RT_STR_TUPLE("!3!") },
1463 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1464 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1465 /* [06] = */ { RT_STR_TUPLE("!6!") },
1466 /* [07] = */ { RT_STR_TUPLE("!7!") },
1467 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1468 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1469 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1470 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1471 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1472 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1473 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1474 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1475 /* [10] = */ { RT_STR_TUPLE("!10!") },
1476 /* [11] = */ { RT_STR_TUPLE("!11!") },
1477 /* [12] = */ { RT_STR_TUPLE("!12!") },
1478 /* [13] = */ { RT_STR_TUPLE("!13!") },
1479 /* [14] = */ { RT_STR_TUPLE("!14!") },
1480 /* [15] = */ { RT_STR_TUPLE("!15!") },
1481 /* [16] = */ { RT_STR_TUPLE("!16!") },
1482 /* [17] = */ { RT_STR_TUPLE("!17!") },
1483 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1484 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1485 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1486 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1487 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1488 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1489 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1490 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1491 };
1492 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1493 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1494 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1495
1496 pszBuf[off++] = ' ';
1497 pszBuf[off++] = 'C';
1498 pszBuf[off++] = 'P';
1499 pszBuf[off++] = 'L';
1500 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1501 Assert(off < 32);
1502
1503 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1504
1505 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1506 {
1507 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1508 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1509 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1510 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1511 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1512 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1513 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1514 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1515 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1516 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1517 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1518 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1519 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1520 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1521 };
1522 if (fFlags)
1523 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1524 if (s_aFlags[i].fFlag & fFlags)
1525 {
1526 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1527 pszBuf[off++] = ' ';
1528 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1529 off += s_aFlags[i].cchName;
1530 fFlags &= ~s_aFlags[i].fFlag;
1531 if (!fFlags)
1532 break;
1533 }
1534 pszBuf[off] = '\0';
1535
1536 return pszBuf;
1537}
1538
1539
1540/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1541static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1542{
1543 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1544 pDis->cbCachedInstr += cbMaxRead;
1545 RT_NOREF(cbMinRead);
1546 return VERR_NO_DATA;
1547}
1548
1549
1550/**
1551 * Worker for iemThreadedDisassembleTb.
1552 */
1553static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1554 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1555{
1556 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1557 {
1558 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1559 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1560 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1561 {
1562 PIEMTB pLookupTb = papTbLookup[iLookup];
1563 if (pLookupTb)
1564 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1565 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1566 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1567 : "invalid");
1568 else
1569 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1570 }
1571 pHlp->pfnPrintf(pHlp, "\n");
1572 }
1573 else
1574 {
1575 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1576 idxFirst, cEntries, pTb->cTbLookupEntries);
1577 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1578 }
1579}
1580
1581
1582DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1583{
1584 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1585
1586 char szDisBuf[512];
1587
1588 /*
1589 * Print TB info.
1590 */
1591 pHlp->pfnPrintf(pHlp,
1592 "pTb=%p: GCPhysPc=%RGp (%RGv) cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1593 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1594 pTb, pTb->GCPhysPc, pTb->FlatPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1595 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1596
1597 /*
1598 * This disassembly is driven by the debug info which follows the native
1599 * code and indicates when it starts with the next guest instructions,
1600 * where labels are and such things.
1601 */
1602 DISSTATE Dis;
1603 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1604 uint32_t const cCalls = pTb->Thrd.cCalls;
1605 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1606 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1607 : DISCPUMODE_64BIT;
1608 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1609 uint8_t idxRange = UINT8_MAX;
1610 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1611 uint32_t offRange = 0;
1612 uint32_t offOpcodes = 0;
1613 uint32_t const cbOpcodes = pTb->cbOpcodes;
1614 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1615 bool fTbLookupSeen0 = false;
1616
1617 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1618 {
1619 /*
1620 * New opcode range?
1621 */
1622 if ( idxRange == UINT8_MAX
1623 || idxRange >= cRanges
1624 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1625 {
1626 idxRange += 1;
1627 if (idxRange < cRanges)
1628 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1629 else
1630 continue;
1631 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1632 + (pTb->aRanges[idxRange].idxPhysPage == 0
1633 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1634 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1635 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1636 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1637 pTb->aRanges[idxRange].idxPhysPage);
1638 GCPhysPc += offRange;
1639 }
1640
1641 /*
1642 * Disassemble another guest instruction?
1643 */
1644 if ( paCalls[iCall].offOpcode != offOpcodes
1645 && paCalls[iCall].cbOpcode > 0
1646 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1647 {
1648 offOpcodes = paCalls[iCall].offOpcode;
1649 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1650 uint32_t cbInstr = 1;
1651 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1652 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1653 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1654 if (RT_SUCCESS(rc))
1655 {
1656 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1657 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1658 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1659 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1660 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1661 }
1662 else
1663 {
1664 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1665 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1666 cbInstr = paCalls[iCall].cbOpcode;
1667 }
1668 GCPhysPc += cbInstr;
1669 offRange += cbInstr;
1670 }
1671
1672 /*
1673 * Dump call details.
1674 */
1675 pHlp->pfnPrintf(pHlp,
1676 " Call #%u to %s (%u args)\n",
1677 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1678 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1679 if (paCalls[iCall].uTbLookup != 0)
1680 {
1681 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1682 fTbLookupSeen0 = idxFirst == 0;
1683 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1684 }
1685
1686 /*
1687 * Snoop fExec.
1688 */
1689 switch (paCalls[iCall].enmFunction)
1690 {
1691 default:
1692 break;
1693 case kIemThreadedFunc_BltIn_CheckMode:
1694 fExec = paCalls[iCall].auParams[0];
1695 break;
1696 }
1697 }
1698
1699 if (!fTbLookupSeen0)
1700 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1701}
1702
1703
1704
1705/**
1706 * Allocate a translation block for threadeded recompilation.
1707 *
1708 * This is allocated with maxed out call table and storage for opcode bytes,
1709 * because it's only supposed to be called once per EMT to allocate the TB
1710 * pointed to by IEMCPU::pThrdCompileTbR3.
1711 *
1712 * @returns Pointer to the translation block on success, NULL on failure.
1713 * @param pVM The cross context virtual machine structure.
1714 * @param pVCpu The cross context virtual CPU structure of the calling
1715 * thread.
1716 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1717 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1718 */
1719static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1720{
1721 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1722 if (pTb)
1723 {
1724 unsigned const cCalls = 256;
1725 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1726 if (pTb->Thrd.paCalls)
1727 {
1728 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1729 if (pTb->pabOpcodes)
1730 {
1731 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1732 pTb->Thrd.cAllocated = cCalls;
1733 pTb->Thrd.cCalls = 0;
1734 pTb->cbOpcodes = 0;
1735 pTb->pNext = NULL;
1736 pTb->cUsed = 0;
1737 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1738 pTb->idxAllocChunk = UINT8_MAX;
1739 pTb->GCPhysPc = GCPhysPc;
1740 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1741 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1742 pTb->cInstructions = 0;
1743 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1744
1745 /* Init the first opcode range. */
1746 pTb->cRanges = 1;
1747 pTb->aRanges[0].cbOpcodes = 0;
1748 pTb->aRanges[0].offOpcodes = 0;
1749 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1750 pTb->aRanges[0].u2Unused = 0;
1751 pTb->aRanges[0].idxPhysPage = 0;
1752 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1753 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1754
1755 return pTb;
1756 }
1757 RTMemFree(pTb->Thrd.paCalls);
1758 }
1759 RTMemFree(pTb);
1760 }
1761 RT_NOREF(pVM);
1762 return NULL;
1763}
1764
1765
1766/**
1767 * Called on the TB that are dedicated for recompilation before it's reused.
1768 *
1769 * @param pVCpu The cross context virtual CPU structure of the calling
1770 * thread.
1771 * @param pTb The translation block to reuse.
1772 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1773 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1774 */
1775static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1776{
1777 pTb->GCPhysPc = GCPhysPc;
1778 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1779 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1780 pTb->Thrd.cCalls = 0;
1781 pTb->cbOpcodes = 0;
1782 pTb->cInstructions = 0;
1783 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1784
1785 /* Init the first opcode range. */
1786 pTb->cRanges = 1;
1787 pTb->aRanges[0].cbOpcodes = 0;
1788 pTb->aRanges[0].offOpcodes = 0;
1789 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1790 pTb->aRanges[0].u2Unused = 0;
1791 pTb->aRanges[0].idxPhysPage = 0;
1792 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1793 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1794}
1795
1796
1797/**
1798 * Used to duplicate a threded translation block after recompilation is done.
1799 *
1800 * @returns Pointer to the translation block on success, NULL on failure.
1801 * @param pVM The cross context virtual machine structure.
1802 * @param pVCpu The cross context virtual CPU structure of the calling
1803 * thread.
1804 * @param pTbSrc The TB to duplicate.
1805 */
1806static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1807{
1808 /*
1809 * Just using the heap for now. Will make this more efficient and
1810 * complicated later, don't worry. :-)
1811 */
1812 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1813 if (pTb)
1814 {
1815 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1816 memcpy(pTb, pTbSrc, sizeof(*pTb));
1817 pTb->idxAllocChunk = idxAllocChunk;
1818
1819 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1820 Assert(cCalls > 0);
1821 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1822 if (pTb->Thrd.paCalls)
1823 {
1824 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1825 Assert(cbTbLookup > 0);
1826 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1827 Assert(cbOpcodes > 0);
1828 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1829 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1830 if (pbBoth)
1831 {
1832 RT_BZERO(pbBoth, cbTbLookup);
1833 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1834 pTb->Thrd.cAllocated = cCalls;
1835 pTb->pNext = NULL;
1836 pTb->cUsed = 0;
1837 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1838 pTb->fFlags = pTbSrc->fFlags;
1839
1840 return pTb;
1841 }
1842 RTMemFree(pTb->Thrd.paCalls);
1843 }
1844 iemTbAllocatorFree(pVCpu, pTb);
1845 }
1846 RT_NOREF(pVM);
1847 return NULL;
1848
1849}
1850
1851
1852/**
1853 * Adds the given TB to the hash table.
1854 *
1855 * @param pVCpu The cross context virtual CPU structure of the calling
1856 * thread.
1857 * @param pTbCache The cache to add it to.
1858 * @param pTb The translation block to add.
1859 */
1860static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1861{
1862 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1863
1864 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1865 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1866 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1867 if (LogIs12Enabled())
1868 {
1869 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1870 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1871 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1872 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1873 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1874 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1875 pTb->aRanges[idxRange].idxPhysPage == 0
1876 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1877 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1878 }
1879}
1880
1881
1882/**
1883 * Called by opcode verifier functions when they detect a problem.
1884 */
1885void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1886{
1887 /* We cannot free the current TB (indicated by fSafeToFree) because:
1888 - A threaded TB will have its current call entry accessed
1889 to update pVCpu->iem.s.cInstructions.
1890 - A native TB will have code left to execute. */
1891 if (fSafeToFree)
1892 iemTbAllocatorFree(pVCpu, pTb);
1893 else
1894 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1895}
1896
1897
1898/*
1899 * Real code.
1900 */
1901
1902#ifdef LOG_ENABLED
1903/**
1904 * Logs the current instruction.
1905 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1906 * @param pszFunction The IEM function doing the execution.
1907 * @param idxInstr The instruction number in the block.
1908 */
1909static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1910{
1911# ifdef IN_RING3
1912 if (LogIs2Enabled())
1913 {
1914 char szInstr[256];
1915 uint32_t cbInstr = 0;
1916 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1917 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1918 szInstr, sizeof(szInstr), &cbInstr);
1919
1920 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1921 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1922 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1923 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1924 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1925 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1926 " %s\n"
1927 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1928 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1929 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1930 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1931 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1932 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1933 szInstr));
1934
1935 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1936 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1937 }
1938 else
1939# endif
1940 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1941 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1942}
1943#endif /* LOG_ENABLED */
1944
1945
1946#if 0
1947static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1948{
1949 RT_NOREF(pVM, pVCpu);
1950 return rcStrict;
1951}
1952#endif
1953
1954
1955/**
1956 * Initializes the decoder state when compiling TBs.
1957 *
1958 * This presumes that fExec has already be initialized.
1959 *
1960 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1961 * to apply fixes to them as well.
1962 *
1963 * @param pVCpu The cross context virtual CPU structure of the calling
1964 * thread.
1965 * @param fReInit Clear for the first call for a TB, set for subsequent
1966 * calls from inside the compile loop where we can skip a
1967 * couple of things.
1968 * @param fExtraFlags The extra translation block flags when @a fReInit is
1969 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1970 * checked.
1971 */
1972DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1973{
1974 /* ASSUMES: That iemInitExec was already called and that anyone changing
1975 CPU state affecting the fExec bits since then will have updated fExec! */
1976 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1977 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1978
1979 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1980
1981 /* Decoder state: */
1982 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1983 pVCpu->iem.s.enmEffAddrMode = enmMode;
1984 if (enmMode != IEMMODE_64BIT)
1985 {
1986 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1987 pVCpu->iem.s.enmEffOpSize = enmMode;
1988 }
1989 else
1990 {
1991 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1992 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1993 }
1994 pVCpu->iem.s.fPrefixes = 0;
1995 pVCpu->iem.s.uRexReg = 0;
1996 pVCpu->iem.s.uRexB = 0;
1997 pVCpu->iem.s.uRexIndex = 0;
1998 pVCpu->iem.s.idxPrefix = 0;
1999 pVCpu->iem.s.uVex3rdReg = 0;
2000 pVCpu->iem.s.uVexLength = 0;
2001 pVCpu->iem.s.fEvexStuff = 0;
2002 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
2003 pVCpu->iem.s.offModRm = 0;
2004 pVCpu->iem.s.iNextMapping = 0;
2005
2006 if (!fReInit)
2007 {
2008 pVCpu->iem.s.cActiveMappings = 0;
2009 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
2010 pVCpu->iem.s.fEndTb = false;
2011 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
2012 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2013 pVCpu->iem.s.fTbCrossedPage = false;
2014 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
2015 pVCpu->iem.s.idxLastCheckIrqCallNo = UINT16_MAX;
2016 pVCpu->iem.s.fTbCurInstrIsSti = false;
2017 /* Force RF clearing and TF checking on first instruction in the block
2018 as we don't really know what came before and should assume the worst: */
2019 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
2020 }
2021 else
2022 {
2023 Assert(pVCpu->iem.s.cActiveMappings == 0);
2024 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
2025 Assert(pVCpu->iem.s.fEndTb == false);
2026 Assert(pVCpu->iem.s.fTbCrossedPage == false);
2027 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
2028 }
2029 pVCpu->iem.s.fTbCurInstr = 0;
2030
2031#ifdef DBGFTRACE_ENABLED
2032 switch (IEM_GET_CPU_MODE(pVCpu))
2033 {
2034 case IEMMODE_64BIT:
2035 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
2036 break;
2037 case IEMMODE_32BIT:
2038 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2039 break;
2040 case IEMMODE_16BIT:
2041 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2042 break;
2043 }
2044#endif
2045}
2046
2047
2048/**
2049 * Initializes the opcode fetcher when starting the compilation.
2050 *
2051 * @param pVCpu The cross context virtual CPU structure of the calling
2052 * thread.
2053 */
2054DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
2055{
2056 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
2057#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2058 pVCpu->iem.s.offOpcode = 0;
2059#else
2060 RT_NOREF(pVCpu);
2061#endif
2062}
2063
2064
2065/**
2066 * Re-initializes the opcode fetcher between instructions while compiling.
2067 *
2068 * @param pVCpu The cross context virtual CPU structure of the calling
2069 * thread.
2070 */
2071DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
2072{
2073 if (pVCpu->iem.s.pbInstrBuf)
2074 {
2075 uint64_t off = pVCpu->cpum.GstCtx.rip;
2076 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2077 off += pVCpu->cpum.GstCtx.cs.u64Base;
2078 off -= pVCpu->iem.s.uInstrBufPc;
2079 if (off < pVCpu->iem.s.cbInstrBufTotal)
2080 {
2081 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2082 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2083 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2084 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2085 else
2086 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2087 }
2088 else
2089 {
2090 pVCpu->iem.s.pbInstrBuf = NULL;
2091 pVCpu->iem.s.offInstrNextByte = 0;
2092 pVCpu->iem.s.offCurInstrStart = 0;
2093 pVCpu->iem.s.cbInstrBuf = 0;
2094 pVCpu->iem.s.cbInstrBufTotal = 0;
2095 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2096 }
2097 }
2098 else
2099 {
2100 pVCpu->iem.s.offInstrNextByte = 0;
2101 pVCpu->iem.s.offCurInstrStart = 0;
2102 pVCpu->iem.s.cbInstrBuf = 0;
2103 pVCpu->iem.s.cbInstrBufTotal = 0;
2104#ifdef VBOX_STRICT
2105 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2106#endif
2107 }
2108#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2109 pVCpu->iem.s.offOpcode = 0;
2110#endif
2111}
2112
2113#ifdef LOG_ENABLED
2114
2115/**
2116 * Inserts a NOP call.
2117 *
2118 * This is for debugging.
2119 *
2120 * @returns true on success, false if we're out of call entries.
2121 * @param pTb The translation block being compiled.
2122 */
2123bool iemThreadedCompileEmitNop(PIEMTB pTb)
2124{
2125 /* Emit the call. */
2126 uint32_t const idxCall = pTb->Thrd.cCalls;
2127 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2128 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2129 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2130 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2131 pCall->idxInstr = pTb->cInstructions - 1;
2132 pCall->cbOpcode = 0;
2133 pCall->offOpcode = 0;
2134 pCall->uTbLookup = 0;
2135 pCall->fFlags = 0;
2136 pCall->auParams[0] = 0;
2137 pCall->auParams[1] = 0;
2138 pCall->auParams[2] = 0;
2139 return true;
2140}
2141
2142
2143/**
2144 * Called by iemThreadedCompile if cpu state logging is desired.
2145 *
2146 * @returns true on success, false if we're out of call entries.
2147 * @param pTb The translation block being compiled.
2148 */
2149bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2150{
2151 /* Emit the call. */
2152 uint32_t const idxCall = pTb->Thrd.cCalls;
2153 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2154 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2155 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2156 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2157 pCall->idxInstr = pTb->cInstructions - 1;
2158 pCall->cbOpcode = 0;
2159 pCall->offOpcode = 0;
2160 pCall->uTbLookup = 0;
2161 pCall->fFlags = 0;
2162 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2163 pCall->auParams[1] = 0;
2164 pCall->auParams[2] = 0;
2165 return true;
2166}
2167
2168#endif /* LOG_ENABLED */
2169
2170DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2171{
2172 switch (cbInstr)
2173 {
2174 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2175 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2176 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2177 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2178 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2179 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2180 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2181 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2182 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2183 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2184 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2185 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2186 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2187 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2188 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2189 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2190 }
2191}
2192
2193#ifdef IEM_WITH_INTRA_TB_JUMPS
2194
2195/**
2196 * Emits the necessary tail calls for a full TB loop-jump.
2197 */
2198static bool iemThreadedCompileFullTbJump(PVMCPUCC pVCpu, PIEMTB pTb)
2199{
2200 /*
2201 * We need a timer and maybe IRQ check before jumping, so make sure
2202 * we've got sufficient call entries left before emitting anything.
2203 */
2204 uint32_t idxCall = pTb->Thrd.cCalls;
2205 if (idxCall + 1U <= pTb->Thrd.cAllocated)
2206 {
2207 /*
2208 * We're good, emit the calls.
2209 */
2210 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2211 pTb->Thrd.cCalls = (uint16_t)(idxCall + 2);
2212
2213 /* Always check timers as we risk getting stuck in a loop otherwise. We
2214 combine it with an IRQ check if that's not performed in the TB already. */
2215 pCall->enmFunction = pVCpu->iem.s.idxLastCheckIrqCallNo < idxCall
2216 ? kIemThreadedFunc_BltIn_CheckTimers
2217 : kIemThreadedFunc_BltIn_CheckTimersAndIrq;
2218 pCall->idxInstr = 0;
2219 pCall->offOpcode = 0;
2220 pCall->cbOpcode = 0;
2221 pCall->uTbLookup = 0;
2222 pCall->fFlags = 0;
2223 pCall->auParams[0] = 0;
2224 pCall->auParams[1] = 0;
2225 pCall->auParams[2] = 0;
2226 pCall++;
2227
2228 /* The jump callentry[0]. */
2229 pCall->enmFunction = kIemThreadedFunc_BltIn_Jump;
2230 pCall->idxInstr = 0;
2231 pCall->offOpcode = 0;
2232 pCall->cbOpcode = 0;
2233 pCall->uTbLookup = 0;
2234 pCall->fFlags = 0;
2235 pCall->auParams[0] = 0; /* jump target is call zero */
2236 pCall->auParams[1] = 0;
2237 pCall->auParams[2] = 0;
2238
2239 /* Mark callentry #0 as a jump target. */
2240 pTb->Thrd.paCalls[0].fFlags |= IEMTHREADEDCALLENTRY_F_JUMP_TARGET;
2241 }
2242
2243 return false;
2244}
2245
2246/**
2247 * Called by IEM_MC2_BEGIN_EMIT_CALLS when it detects that we're back at the
2248 * first instruction and we didn't just branch to it (that's handled below).
2249 *
2250 * This will emit a loop iff everything is compatible with that.
2251 */
2252DECLHIDDEN(int) iemThreadedCompileBackAtFirstInstruction(PVMCPU pVCpu, PIEMTB pTb) RT_NOEXCEPT
2253{
2254 /* Check if the mode matches. */
2255 if ( (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2256 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS))
2257 {
2258 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected2);
2259 iemThreadedCompileFullTbJump(pVCpu, pTb);
2260 }
2261 return VINF_IEM_RECOMPILE_END_TB;
2262}
2263
2264#endif /* IEM_WITH_INTRA_TB_JUMPS */
2265
2266
2267/**
2268 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2269 *
2270 * - CS LIM check required.
2271 * - Must recheck opcode bytes.
2272 * - Previous instruction branched.
2273 * - TLB load detected, probably due to page crossing.
2274 *
2275 * @returns true if everything went well, false if we're out of space in the TB
2276 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2277 * @param pVCpu The cross context virtual CPU structure of the calling
2278 * thread.
2279 * @param pTb The translation block being compiled.
2280 */
2281bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2282{
2283 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2284 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2285#if 0
2286 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2287 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2288#endif
2289
2290 /*
2291 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2292 * see if it's needed to start checking.
2293 */
2294 bool fConsiderCsLimChecking;
2295 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2296 if ( fMode == IEM_F_MODE_X86_64BIT
2297 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2298 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2299 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2300 fConsiderCsLimChecking = false; /* already enabled or not needed */
2301 else
2302 {
2303 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2304 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2305 fConsiderCsLimChecking = true; /* likely */
2306 else
2307 {
2308 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2309 return false;
2310 }
2311 }
2312
2313 /*
2314 * Prepare call now, even before we know if can accept the instruction in this TB.
2315 * This allows us amending parameters w/o making every case suffer.
2316 */
2317 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2318 uint16_t const offOpcode = pTb->cbOpcodes;
2319 uint8_t idxRange = pTb->cRanges - 1;
2320
2321 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2322 pCall->idxInstr = pTb->cInstructions;
2323 pCall->cbOpcode = cbInstr;
2324 pCall->offOpcode = offOpcode;
2325 pCall->uTbLookup = 0;
2326 pCall->fFlags = 0;
2327 pCall->auParams[0] = (uint32_t)cbInstr
2328 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2329 /* The upper dword is sometimes used for cbStartPage. */;
2330 pCall->auParams[1] = idxRange;
2331 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2332
2333/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2334 * gotten onto. If we do, stop */
2335
2336 /*
2337 * Case 1: We've branched (RIP changed).
2338 *
2339 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2340 * TB, end the TB here as it is most likely a loop and if it
2341 * made sense to unroll it, the guest code compiler should've
2342 * done it already.
2343 *
2344 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2345 * Req: 1 extra range, no extra phys.
2346 *
2347 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2348 * necessary (fTbCrossedPage is true).
2349 * Req: 1 extra range, probably 1 extra phys page entry.
2350 *
2351 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2352 * but in addition we cross into the following page and require
2353 * another TLB load.
2354 * Req: 2 extra ranges, probably 2 extra phys page entries.
2355 *
2356 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2357 * the following page (thus fTbCrossedPage is true).
2358 * Req: 2 extra ranges, probably 1 extra phys page entry.
2359 *
2360 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2361 * it may trigger "spuriously" from the CPU point of view because of
2362 * physical page changes that'll invalid the physical TLB and trigger a
2363 * call to the function. In theory this be a big deal, just a bit
2364 * performance loss as we'll pick the LoadingTlb variants.
2365 *
2366 * Note! We do not currently optimize branching to the next instruction (sorry
2367 * 32-bit PIC code). We could maybe do that in the branching code that
2368 * sets (or not) fTbBranched.
2369 */
2370 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2371 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2372 * code. This'll require filtering out far jmps and calls, as they
2373 * load CS which should technically be considered indirect since the
2374 * GDT/LDT entry's base address can be modified independently from
2375 * the code. */
2376 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2377 {
2378 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2379 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2380 {
2381 /* 1a + 1b - instruction fully within the branched to page. */
2382 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2383 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2384
2385 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2386 {
2387 /* Check that we've got a free range. */
2388 idxRange += 1;
2389 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2390 { /* likely */ }
2391 else
2392 {
2393 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2394 return false;
2395 }
2396 pCall->auParams[1] = idxRange;
2397 pCall->auParams[2] = 0;
2398
2399 /* Check that we've got a free page slot. */
2400 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2401 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2402 uint8_t idxPhysPage;
2403 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2404 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2405 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2406 {
2407 pTb->aGCPhysPages[0] = GCPhysNew;
2408 pTb->aRanges[idxRange].idxPhysPage = 1;
2409 idxPhysPage = UINT8_MAX;
2410 }
2411 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2412 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2413 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2414 {
2415 pTb->aGCPhysPages[1] = GCPhysNew;
2416 pTb->aRanges[idxRange].idxPhysPage = 2;
2417 idxPhysPage = UINT8_MAX;
2418 }
2419 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2420 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2421 else
2422 {
2423 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2424 return false;
2425 }
2426
2427 /* Loop check: We weave the loop check in here to optimize the lookup. */
2428 if (idxPhysPage != UINT8_MAX)
2429 {
2430 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2431 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2432 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2433 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2434 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2435 {
2436 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2437#ifdef IEM_WITH_INTRA_TB_JUMPS
2438 /* If we're looping back to the start of the TB and the mode is still the same,
2439 we could emit a jump optimization. For now we don't do page transitions
2440 as that implies TLB loading and such. */
2441 if ( idxLoopRange == 0
2442 && offPhysPc == pTb->aRanges[0].offPhysPage
2443 && (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2444 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS)
2445 && (pVCpu->iem.s.fTbBranched & ( IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR
2446 | IEMBRANCHED_F_STACK | IEMBRANCHED_F_RELATIVE))
2447 == IEMBRANCHED_F_RELATIVE)
2448 {
2449 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected);
2450 return iemThreadedCompileFullTbJump(pVCpu, pTb);
2451 }
2452#endif
2453 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2454 return false;
2455 }
2456 }
2457
2458 /* Finish setting up the new range. */
2459 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2460 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2461 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2462 pTb->aRanges[idxRange].u2Unused = 0;
2463 pTb->cRanges++;
2464 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2465 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2466 pTb->aRanges[idxRange].offOpcodes));
2467 }
2468 else
2469 {
2470 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2471 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2472 }
2473
2474 /* Determin which function we need to load & check.
2475 Note! For jumps to a new page, we'll set both fTbBranched and
2476 fTbCrossedPage to avoid unnecessary TLB work for intra
2477 page branching */
2478 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2479 || pVCpu->iem.s.fTbCrossedPage)
2480 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2481 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2482 : !fConsiderCsLimChecking
2483 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2484 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2485 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2486 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2487 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2488 : !fConsiderCsLimChecking
2489 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2490 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2491 else
2492 {
2493 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2494 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2495 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2496 : !fConsiderCsLimChecking
2497 ? kIemThreadedFunc_BltIn_CheckOpcodes
2498 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2499 }
2500 }
2501 else
2502 {
2503 /* 1c + 1d - instruction crosses pages. */
2504 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2505 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2506
2507 /* Lazy bird: Check that this isn't case 1c, since we've already
2508 load the first physical address. End the TB and
2509 make it a case 2b instead.
2510
2511 Hmm. Too much bother to detect, so just do the same
2512 with case 1d as well. */
2513#if 0 /** @todo get back to this later when we've got the actual branch code in
2514 * place. */
2515 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2516
2517 /* Check that we've got two free ranges. */
2518 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2519 { /* likely */ }
2520 else
2521 return false;
2522 idxRange += 1;
2523 pCall->auParams[1] = idxRange;
2524 pCall->auParams[2] = 0;
2525
2526 /* ... */
2527
2528#else
2529 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2530 return false;
2531#endif
2532 }
2533 }
2534
2535 /*
2536 * Case 2: Page crossing.
2537 *
2538 * Sub-case 2a: The instruction starts on the first byte in the next page.
2539 *
2540 * Sub-case 2b: The instruction has opcode bytes in both the current and
2541 * following page.
2542 *
2543 * Both cases requires a new range table entry and probably a new physical
2544 * page entry. The difference is in which functions to emit and whether to
2545 * add bytes to the current range.
2546 */
2547 else if (pVCpu->iem.s.fTbCrossedPage)
2548 {
2549 /* Check that we've got a free range. */
2550 idxRange += 1;
2551 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2552 { /* likely */ }
2553 else
2554 {
2555 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2556 return false;
2557 }
2558
2559 /* Check that we've got a free page slot. */
2560 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2561 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2562 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2563 pTb->aRanges[idxRange].idxPhysPage = 0;
2564 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2565 || pTb->aGCPhysPages[0] == GCPhysNew)
2566 {
2567 pTb->aGCPhysPages[0] = GCPhysNew;
2568 pTb->aRanges[idxRange].idxPhysPage = 1;
2569 }
2570 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2571 || pTb->aGCPhysPages[1] == GCPhysNew)
2572 {
2573 pTb->aGCPhysPages[1] = GCPhysNew;
2574 pTb->aRanges[idxRange].idxPhysPage = 2;
2575 }
2576 else
2577 {
2578 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2579 return false;
2580 }
2581
2582 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2583 {
2584 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2585 pCall->auParams[1] = idxRange;
2586 pCall->auParams[2] = 0;
2587
2588 /* Finish setting up the new range. */
2589 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2590 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2591 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2592 pTb->aRanges[idxRange].u2Unused = 0;
2593 pTb->cRanges++;
2594 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2595 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2596 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2597
2598 /* Determin which function we need to load & check. */
2599 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2600 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2601 : !fConsiderCsLimChecking
2602 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2603 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2604 }
2605 else
2606 {
2607 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2608 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2609 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2610 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2611
2612 /* We've good. Split the instruction over the old and new range table entries. */
2613 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2614
2615 pTb->aRanges[idxRange].offPhysPage = 0;
2616 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2617 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2618 pTb->aRanges[idxRange].u2Unused = 0;
2619 pTb->cRanges++;
2620 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2621 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2622 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2623
2624 /* Determin which function we need to load & check. */
2625 if (pVCpu->iem.s.fTbCheckOpcodes)
2626 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2627 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2628 : !fConsiderCsLimChecking
2629 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2630 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2631 else
2632 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2633 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2634 : !fConsiderCsLimChecking
2635 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2636 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2637 }
2638 }
2639
2640 /*
2641 * Regular case: No new range required.
2642 */
2643 else
2644 {
2645 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2646 if (pVCpu->iem.s.fTbCheckOpcodes)
2647 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2648 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2649 : kIemThreadedFunc_BltIn_CheckOpcodes;
2650 else
2651 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2652
2653 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2654 pTb->cbOpcodes = offOpcode + cbInstr;
2655 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2656 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2657 }
2658
2659 /*
2660 * Commit the call.
2661 */
2662 pTb->Thrd.cCalls++;
2663
2664 /*
2665 * Clear state.
2666 */
2667 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2668 pVCpu->iem.s.fTbCrossedPage = false;
2669 pVCpu->iem.s.fTbCheckOpcodes = false;
2670
2671 /*
2672 * Copy opcode bytes.
2673 */
2674 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2675 pTb->cbOpcodes = offOpcode + cbInstr;
2676 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2677
2678 return true;
2679}
2680
2681
2682/**
2683 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2684 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2685 *
2686 * @returns true if anything is pending, false if not.
2687 * @param pVCpu The cross context virtual CPU structure of the calling
2688 * thread.
2689 */
2690DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2691{
2692 uint64_t fCpu = pVCpu->fLocalForcedActions;
2693 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2694#if 1
2695 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2696 if (RT_LIKELY( !fCpu
2697 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2698 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2699 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2700 return false;
2701 return true;
2702#else
2703 return false;
2704#endif
2705
2706}
2707
2708
2709/**
2710 * Called by iemThreadedCompile when a block requires a mode check.
2711 *
2712 * @returns true if we should continue, false if we're out of call entries.
2713 * @param pVCpu The cross context virtual CPU structure of the calling
2714 * thread.
2715 * @param pTb The translation block being compiled.
2716 */
2717static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2718{
2719 /* Emit the call. */
2720 uint32_t const idxCall = pTb->Thrd.cCalls;
2721 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2722 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2723 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2724 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2725 pCall->idxInstr = pTb->cInstructions - 1;
2726 pCall->cbOpcode = 0;
2727 pCall->offOpcode = 0;
2728 pCall->uTbLookup = 0;
2729 pCall->fFlags = 0;
2730 pCall->auParams[0] = pVCpu->iem.s.fExec;
2731 pCall->auParams[1] = 0;
2732 pCall->auParams[2] = 0;
2733 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2734 return true;
2735}
2736
2737
2738/**
2739 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2740 * set.
2741 *
2742 * @returns true if we should continue, false if an IRQ is deliverable or a
2743 * relevant force flag is pending.
2744 * @param pVCpu The cross context virtual CPU structure of the calling
2745 * thread.
2746 * @param pTb The translation block being compiled.
2747 * @sa iemThreadedCompileCheckIrq
2748 */
2749bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2750{
2751 /*
2752 * Skip this we've already emitted a call after the previous instruction
2753 * or if it's the first call, as we're always checking FFs between blocks.
2754 */
2755 uint32_t const idxCall = pTb->Thrd.cCalls;
2756 if ( idxCall > 0
2757 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2758 {
2759 /* Emit the call. */
2760 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2761 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2762 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2763 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2764 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2765 pCall->idxInstr = pTb->cInstructions;
2766 pCall->offOpcode = 0;
2767 pCall->cbOpcode = 0;
2768 pCall->uTbLookup = 0;
2769 pCall->fFlags = 0;
2770 pCall->auParams[0] = 0;
2771 pCall->auParams[1] = 0;
2772 pCall->auParams[2] = 0;
2773 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2774
2775 /* Reset the IRQ check value. */
2776 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2777
2778 /*
2779 * Check for deliverable IRQs and pending force flags.
2780 */
2781 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2782 }
2783 return true; /* continue */
2784}
2785
2786
2787/**
2788 * Emits an IRQ check call and checks for pending IRQs.
2789 *
2790 * @returns true if we should continue, false if an IRQ is deliverable or a
2791 * relevant force flag is pending.
2792 * @param pVCpu The cross context virtual CPU structure of the calling
2793 * thread.
2794 * @param pTb The transation block.
2795 * @sa iemThreadedCompileBeginEmitCallsComplications
2796 */
2797static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2798{
2799 /* Check again in a little bit, unless it is immediately following an STI
2800 in which case we *must* check immediately after the next instruction
2801 as well in case it's executed with interrupt inhibition. We could
2802 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2803 bs3-timers-1 which is doing sti + sti + cli. */
2804 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2805 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2806 else
2807 {
2808 pVCpu->iem.s.fTbCurInstrIsSti = false;
2809 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2810 }
2811 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2812
2813 /*
2814 * Emit the call.
2815 */
2816 uint32_t const idxCall = pTb->Thrd.cCalls;
2817 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2818 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2819 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2820 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2821 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2822 pCall->idxInstr = pTb->cInstructions;
2823 pCall->offOpcode = 0;
2824 pCall->cbOpcode = 0;
2825 pCall->uTbLookup = 0;
2826 pCall->fFlags = 0;
2827 pCall->auParams[0] = 0;
2828 pCall->auParams[1] = 0;
2829 pCall->auParams[2] = 0;
2830
2831 /*
2832 * Check for deliverable IRQs and pending force flags.
2833 */
2834 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2835}
2836
2837
2838/**
2839 * Compiles a new TB and executes it.
2840 *
2841 * We combine compilation and execution here as it makes it simpler code flow
2842 * in the main loop and it allows interpreting while compiling if we want to
2843 * explore that option.
2844 *
2845 * @returns Strict VBox status code.
2846 * @param pVM The cross context virtual machine structure.
2847 * @param pVCpu The cross context virtual CPU structure of the calling
2848 * thread.
2849 * @param GCPhysPc The physical address corresponding to the current
2850 * RIP+CS.BASE.
2851 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2852 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2853 */
2854static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC
2855iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2856{
2857 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2858 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2859 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2860
2861 /*
2862 * Get the TB we use for the recompiling. This is a maxed-out TB so
2863 * that'll we'll make a more efficient copy of when we're done compiling.
2864 */
2865 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2866 if (pTb)
2867 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2868 else
2869 {
2870 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2871 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2872 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2873 }
2874 pTb->FlatPc = pVCpu->iem.s.uInstrBufPc | (GCPhysPc & GUEST_PAGE_OFFSET_MASK);
2875
2876 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2877 functions may get at it. */
2878 pVCpu->iem.s.pCurTbR3 = pTb;
2879
2880#if 0
2881 /* Make sure the CheckIrq condition matches the one in EM. */
2882 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2883 const uint32_t cZeroCalls = 1;
2884#else
2885 const uint32_t cZeroCalls = 0;
2886#endif
2887
2888 /*
2889 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2890 */
2891 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2892 iemThreadedCompileInitOpcodeFetching(pVCpu);
2893 VBOXSTRICTRC rcStrict;
2894 for (;;)
2895 {
2896 /* Process the next instruction. */
2897#ifdef LOG_ENABLED
2898 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2899 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2900 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2901 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2902#endif
2903 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2904 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2905
2906 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2907#if 0
2908 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2909 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2910 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2911#endif
2912 if ( rcStrict == VINF_SUCCESS
2913 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2914 && !pVCpu->iem.s.fEndTb)
2915 {
2916 Assert(pTb->Thrd.cCalls > cCallsPrev);
2917 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2918
2919 pVCpu->iem.s.cInstructions++;
2920
2921 /* Check for mode change _after_ certain CIMPL calls, so check that
2922 we continue executing with the same mode value. */
2923 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2924 { /* probable */ }
2925 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2926 { /* extremely likely */ }
2927 else
2928 break;
2929
2930#if defined(LOG_ENABLED) && 0 /* for debugging */
2931 //iemThreadedCompileEmitNop(pTb);
2932 iemThreadedCompileEmitLogCpuState(pTb);
2933#endif
2934 }
2935 else
2936 {
2937 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2938 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2939 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2940 rcStrict = VINF_SUCCESS;
2941
2942 if (pTb->Thrd.cCalls > cZeroCalls)
2943 {
2944 if (cCallsPrev != pTb->Thrd.cCalls)
2945 pVCpu->iem.s.cInstructions++;
2946 break;
2947 }
2948
2949 pVCpu->iem.s.pCurTbR3 = NULL;
2950 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2951 }
2952
2953 /* Check for IRQs? */
2954 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2955 pVCpu->iem.s.cInstrTillIrqCheck--;
2956 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2957 break;
2958
2959 /* Still space in the TB? */
2960 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2961 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2962 && pTb->cTbLookupEntries < 127)
2963 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2964 else
2965 {
2966 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2967 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2968 break;
2969 }
2970 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2971 }
2972
2973 /*
2974 * Reserve lookup space for the final call entry if necessary.
2975 */
2976 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2977 if (pTb->Thrd.cCalls > 1)
2978 {
2979 if (pFinalCall->uTbLookup == 0)
2980 {
2981 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2982 pTb->cTbLookupEntries += 1;
2983 }
2984 }
2985 else if (pFinalCall->uTbLookup != 0)
2986 {
2987 Assert(pTb->cTbLookupEntries > 1);
2988 pFinalCall->uTbLookup -= 1;
2989 pTb->cTbLookupEntries -= 1;
2990 }
2991
2992 /*
2993 * Duplicate the TB into a completed one and link it.
2994 */
2995 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2996 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2997
2998 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2999
3000#ifdef IEM_COMPILE_ONLY_MODE
3001 /*
3002 * Execute the translation block.
3003 */
3004#endif
3005
3006 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3007}
3008
3009
3010
3011/*********************************************************************************************************************************
3012* Threaded Translation Block Saving and Restoring for Profiling the Native Recompiler *
3013*********************************************************************************************************************************/
3014#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3015# include <iprt/message.h>
3016
3017static const SSMFIELD g_aIemThreadedTbFields[] =
3018{
3019 SSMFIELD_ENTRY( IEMTB, cUsed),
3020 SSMFIELD_ENTRY( IEMTB, msLastUsed),
3021 SSMFIELD_ENTRY_GCPHYS(IEMTB, GCPhysPc),
3022 SSMFIELD_ENTRY( IEMTB, fFlags),
3023 SSMFIELD_ENTRY( IEMTB, x86.fAttr),
3024 SSMFIELD_ENTRY( IEMTB, cRanges),
3025 SSMFIELD_ENTRY( IEMTB, cInstructions),
3026 SSMFIELD_ENTRY( IEMTB, Thrd.cCalls),
3027 SSMFIELD_ENTRY( IEMTB, cTbLookupEntries),
3028 SSMFIELD_ENTRY( IEMTB, cbOpcodes),
3029 SSMFIELD_ENTRY( IEMTB, FlatPc),
3030 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[0]),
3031 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[1]),
3032 SSMFIELD_ENTRY_TERM()
3033};
3034
3035/**
3036 * Saves a threaded TB to a dedicated saved state file.
3037 */
3038static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb)
3039{
3040 /* Only VCPU #0 for now. */
3041 if (pVCpu->idCpu != 0)
3042 return;
3043
3044 /*
3045 * Get the SSM handle, lazily opening the output file.
3046 */
3047 PSSMHANDLE const pNil = (PSSMHANDLE)~(uintptr_t)0; Assert(!RT_VALID_PTR(pNil));
3048 PSSMHANDLE pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3049 if (pSSM && pSSM != pNil)
3050 { /* likely */ }
3051 else if (pSSM)
3052 return;
3053 else
3054 {
3055 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil;
3056 int rc = SSMR3Open("ThreadedTBsForRecompilerProfiling.sav", NULL, NULL, SSM_OPEN_F_FOR_WRITING, &pSSM);
3057 AssertLogRelRCReturnVoid(rc);
3058
3059 rc = SSMR3WriteFileHeader(pSSM, 1);
3060 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3061
3062 rc = SSMR3WriteUnitBegin(pSSM, "threaded-tbs", 1, 0);
3063 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3064 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pSSM;
3065 }
3066
3067 /*
3068 * Do the actual saving.
3069 */
3070 SSMR3PutU32(pSSM, 0); /* Indicates that another TB follows. */
3071
3072 /* The basic structure. */
3073 SSMR3PutStructEx(pSSM, pTb, sizeof(*pTb), 0 /*fFlags*/, g_aIemThreadedTbFields, NULL);
3074
3075 /* The ranges. */
3076 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3077 {
3078 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offOpcodes);
3079 SSMR3PutU16(pSSM, pTb->aRanges[iRange].cbOpcodes);
3080 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offPhysPage | (pTb->aRanges[iRange].idxPhysPage << 14));
3081 }
3082
3083 /* The opcodes. */
3084 SSMR3PutMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3085
3086 /* The threaded call table. */
3087 int rc = SSMR3PutMem(pSSM, pTb->Thrd.paCalls, sizeof(*pTb->Thrd.paCalls) * pTb->Thrd.cCalls);
3088 AssertLogRelMsgStmt(RT_SUCCESS(rc), ("rc=%Rrc\n", rc), pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil);
3089}
3090
3091
3092/**
3093 * Called by IEMR3Term to finish any open profile files.
3094 *
3095 * @note This is not called on the EMT for @a pVCpu, but rather on the thread
3096 * driving the VM termination.
3097 */
3098DECLHIDDEN(void) iemThreadedSaveTbForProfilingCleanup(PVMCPU pVCpu)
3099{
3100 PSSMHANDLE const pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3101 pVCpu->iem.s.pSsmThreadedTbsForProfiling = NULL;
3102 if (RT_VALID_PTR(pSSM))
3103 {
3104 /* Indicate that this is the end. */
3105 SSMR3PutU32(pSSM, UINT32_MAX);
3106
3107 int rc = SSMR3WriteUnitComplete(pSSM);
3108 AssertLogRelRC(rc);
3109 rc = SSMR3WriteFileFooter(pSSM);
3110 AssertLogRelRC(rc);
3111 rc = SSMR3Close(pSSM);
3112 AssertLogRelRC(rc);
3113 }
3114}
3115
3116#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER && VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING */
3117
3118#ifdef IN_RING3
3119/**
3120 * API use to process what iemThreadedSaveTbForProfiling() saved.
3121 *
3122 * @note Do not mix build types or revisions. Local changes between saving the
3123 * TBs and calling this API may cause unexpected trouble.
3124 */
3125VMMR3DECL(int) IEMR3ThreadedProfileRecompilingSavedTbs(PVM pVM, const char *pszFilename, uint32_t cMinTbs)
3126{
3127# if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3128 PVMCPU const pVCpu = pVM->apCpusR3[0];
3129
3130 /* We need to keep an eye on the TB allocator. */
3131 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
3132
3133 /*
3134 * Load the TBs from the file.
3135 */
3136 PSSMHANDLE pSSM = NULL;
3137 int rc = SSMR3Open(pszFilename, NULL, NULL, 0, &pSSM);
3138 if (RT_SUCCESS(rc))
3139 {
3140 uint32_t cTbs = 0;
3141 PIEMTB pTbHead = NULL;
3142 PIEMTB *ppTbTail = &pTbHead;
3143 uint32_t uVersion;
3144 rc = SSMR3Seek(pSSM, "threaded-tbs", 0, &uVersion);
3145 if (RT_SUCCESS(rc))
3146 {
3147 for (;; cTbs++)
3148 {
3149 /* Check for the end tag. */
3150 uint32_t uTag = 0;
3151 rc = SSMR3GetU32(pSSM, &uTag);
3152 AssertRCBreak(rc);
3153 if (uTag == UINT32_MAX)
3154 break;
3155 AssertBreakStmt(uTag == 0, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3156
3157 /* Do we have room for another TB? */
3158 if (pTbAllocator->cInUseTbs + 2 >= pTbAllocator->cMaxTbs)
3159 {
3160 RTMsgInfo("Too many TBs to load, stopping loading early.\n");
3161 break;
3162 }
3163
3164 /* Allocate a new TB. */
3165 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
3166 AssertBreakStmt(uTag == 0, rc = VERR_OUT_OF_RESOURCES);
3167
3168 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
3169 RT_ZERO(*pTb);
3170 pTb->idxAllocChunk = idxAllocChunk;
3171
3172 rc = SSMR3GetStructEx(pSSM, pTb, sizeof(*pTb), 0, g_aIemThreadedTbFields, NULL);
3173 if (RT_SUCCESS(rc))
3174 {
3175 AssertStmt(pTb->Thrd.cCalls > 0 && pTb->Thrd.cCalls <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3176 AssertStmt(pTb->cbOpcodes > 0 && pTb->cbOpcodes <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3177 AssertStmt(pTb->cRanges > 0 && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges), rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3178 AssertStmt(pTb->cTbLookupEntries > 0 && pTb->cTbLookupEntries <= 136, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3179
3180 if (RT_SUCCESS(rc))
3181 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3182 {
3183 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].offOpcodes);
3184 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].cbOpcodes);
3185 uint16_t uTmp = 0;
3186 rc = SSMR3GetU16(pSSM, &uTmp);
3187 AssertRCBreak(rc);
3188 pTb->aRanges[iRange].offPhysPage = uTmp & GUEST_PAGE_OFFSET_MASK;
3189 pTb->aRanges[iRange].idxPhysPage = uTmp >> 14;
3190
3191 AssertBreakStmt(pTb->aRanges[iRange].idxPhysPage <= RT_ELEMENTS(pTb->aGCPhysPages),
3192 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3193 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes < pTb->cbOpcodes,
3194 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3195 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes + pTb->aRanges[iRange].cbOpcodes <= pTb->cbOpcodes,
3196 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3197 }
3198
3199 if (RT_SUCCESS(rc))
3200 {
3201 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAllocZ(sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3202 if (pTb->Thrd.paCalls)
3203 {
3204 size_t const cbTbLookup = pTb->cTbLookupEntries * sizeof(PIEMTB);
3205 Assert(cbTbLookup > 0);
3206 size_t const cbOpcodes = pTb->cbOpcodes;
3207 Assert(cbOpcodes > 0);
3208 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
3209 uint8_t * const pbBoth = (uint8_t *)RTMemAllocZ(cbBoth);
3210 if (pbBoth)
3211 {
3212 pTb->pabOpcodes = &pbBoth[cbTbLookup];
3213 SSMR3GetMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3214 rc = SSMR3GetMem(pSSM, pTb->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3215 if (RT_SUCCESS(rc))
3216 {
3217 *ppTbTail = pTb;
3218 ppTbTail = &pTb->pNext;
3219 continue;
3220 }
3221 }
3222 else
3223 rc = VERR_NO_MEMORY;
3224 RTMemFree(pTb->Thrd.paCalls);
3225 }
3226 else
3227 rc = VERR_NO_MEMORY;
3228 }
3229 }
3230 iemTbAllocatorFree(pVCpu, pTb);
3231 break;
3232 }
3233 if (RT_FAILURE(rc))
3234 RTMsgError("Load error: %Rrc (cTbs=%u)", rc, cTbs);
3235 }
3236 else
3237 RTMsgError("SSMR3Seek failed on '%s': %Rrc", pszFilename, rc);
3238 SSMR3Close(pSSM);
3239 if (RT_SUCCESS(rc))
3240 {
3241 /*
3242 * Recompile the TBs.
3243 */
3244 if (pTbHead)
3245 {
3246 RTMsgInfo("Loaded %u TBs\n", cTbs);
3247 if (cTbs < cMinTbs)
3248 {
3249 RTMsgInfo("Duplicating TBs to reach %u TB target\n", cMinTbs);
3250 for (PIEMTB pTb = pTbHead;
3251 cTbs < cMinTbs && pTbAllocator->cInUseTbs + 2 <= pTbAllocator->cMaxTbs;
3252 pTb = pTb->pNext)
3253 {
3254 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
3255 if (!pTbCopy)
3256 break;
3257 *ppTbTail = pTbCopy;
3258 ppTbTail = &pTbCopy->pNext;
3259 cTbs++;
3260 }
3261 }
3262
3263 PIEMTB pTbWarmup = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead);
3264 if (pTbWarmup)
3265 {
3266 iemNativeRecompile(pVCpu, pTbWarmup);
3267 RTThreadSleep(512); /* to make the start visible in the profiler. */
3268 RTMsgInfo("Ready, set, go!\n");
3269
3270 if ((pTbWarmup->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3271 {
3272 uint32_t cFailed = 0;
3273 uint64_t const nsStart = RTTimeNanoTS();
3274 for (PIEMTB pTb = pTbHead; pTb; pTb = pTb->pNext)
3275 {
3276 iemNativeRecompile(pVCpu, pTb);
3277 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) != IEMTB_F_TYPE_NATIVE)
3278 cFailed++;
3279 }
3280 uint64_t const cNsElapsed = RTTimeNanoTS() - nsStart;
3281 RTMsgInfo("Recompiled %u TBs in %'RU64 ns - averaging %'RU64 ns/TB\n",
3282 cTbs, cNsElapsed, (cNsElapsed + cTbs - 1) / cTbs);
3283 if (cFailed)
3284 {
3285 RTMsgError("Unforuntately %u TB failed!", cFailed);
3286 rc = VERR_GENERAL_FAILURE;
3287 }
3288 RTThreadSleep(128); /* Another gap in the profiler timeline. */
3289 }
3290 else
3291 {
3292 RTMsgError("Failed to recompile the first TB!");
3293 rc = VERR_GENERAL_FAILURE;
3294 }
3295 }
3296 else
3297 rc = VERR_NO_MEMORY;
3298 }
3299 else
3300 {
3301 RTMsgError("'%s' contains no TBs!", pszFilename);
3302 rc = VERR_NO_DATA;
3303 }
3304 }
3305 }
3306 else
3307 RTMsgError("SSMR3Open failed on '%s': %Rrc", pszFilename, rc);
3308 return rc;
3309
3310# else
3311 RT_NOREF(pVM, pszFilename, cMinTbs);
3312 return VERR_NOT_IMPLEMENTED;
3313# endif
3314}
3315#endif /* IN_RING3 */
3316
3317
3318/*********************************************************************************************************************************
3319* Recompiled Execution Core *
3320*********************************************************************************************************************************/
3321
3322/** Default TB factor.
3323 * This is basically the number of nanoseconds we guess executing a TB takes
3324 * on average. We estimates it high if we can.
3325 * @note Best if this is a power of two so it can be translated to a shift. */
3326#define IEM_TIMER_POLL_DEFAULT_FACTOR UINT32_C(64)
3327/** The minimum number of nanoseconds we can allow between timer pollings.
3328 * This must take the cost of TMTimerPollBoolWithNanoTS into mind. We put that
3329 * cost at 104 ns now, thus this constant is at 256 ns. */
3330#define IEM_TIMER_POLL_MIN_NS UINT32_C(256)
3331/** The IEM_TIMER_POLL_MIN_NS value roughly translated to TBs, with some grains
3332 * of salt thrown in.
3333 * The idea is that we will be able to make progress with guest code execution
3334 * before polling timers and between running timers. */
3335#define IEM_TIMER_POLL_MIN_ITER UINT32_C(12)
3336/** The maximum number of nanoseconds we can allow between timer pollings.
3337 * This probably shouldn't be too high, as we don't have any timer
3338 * reprogramming feedback in the polling code. So, when a device reschedule a
3339 * timer for an earlier delivery, we won't know about it. */
3340#define IEM_TIMER_POLL_MAX_NS UINT32_C(8388608) /* 0x800000 ns = 8.4 ms */
3341/** The IEM_TIMER_POLL_MAX_NS value roughly translated to TBs, with some grains
3342 * of salt thrown in.
3343 * This helps control fluctuations in the NU benchmark. */
3344#define IEM_TIMER_POLL_MAX_ITER _512K
3345
3346#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3347/**
3348 * Calculates the number of TBs till the next timer polling using defaults.
3349 *
3350 * This is used when the previous run wasn't long enough to provide sufficient
3351 * data and when comming back from the HALT state and we haven't actually
3352 * executed anything for a while.
3353 */
3354DECL_FORCE_INLINE(uint32_t) iemPollTimersCalcDefaultCountdown(uint64_t cNsDelta) RT_NOEXCEPT
3355{
3356 if (cNsDelta >= IEM_TIMER_POLL_MAX_NS)
3357 return RT_MIN(IEM_TIMER_POLL_MAX_NS / IEM_TIMER_POLL_DEFAULT_FACTOR, IEM_TIMER_POLL_MAX_ITER);
3358
3359 cNsDelta = RT_BIT_64(ASMBitFirstSetU32(cNsDelta) - 1); /* round down to power of 2 */
3360 uint32_t const cRet = cNsDelta / IEM_TIMER_POLL_DEFAULT_FACTOR;
3361 if (cRet >= IEM_TIMER_POLL_MIN_ITER)
3362 {
3363 if (cRet <= IEM_TIMER_POLL_MAX_ITER)
3364 return cRet;
3365 return IEM_TIMER_POLL_MAX_ITER;
3366 }
3367 return IEM_TIMER_POLL_MIN_ITER;
3368}
3369#endif
3370
3371
3372/**
3373 * Helper for polling timers.
3374 */
3375DECLHIDDEN(int) iemPollTimers(PVMCC pVM, PVMCPUCC pVCpu) RT_NOEXCEPT
3376{
3377 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPoll, a);
3378
3379 /*
3380 * Check for VM_FF_TM_VIRTUAL_SYNC and call TMR3VirtualSyncFF if set.
3381 * This is something all EMTs can do.
3382 */
3383 /* If the virtual sync FF is set, respond to it. */
3384 bool fRanTimers = VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC);
3385 if (!fRanTimers)
3386 { /* likely */ }
3387 else
3388 {
3389 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3390 TMR3VirtualSyncFF(pVM, pVCpu);
3391 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3392 }
3393
3394 /*
3395 * Poll timers.
3396 *
3397 * On the 10980xe the polling averaging 314 ticks, with a min of 201, while
3398 * running a norton utilities DOS benchmark program. TSC runs at 3GHz,
3399 * translating that to 104 ns and 67 ns respectively. (An M2 booting win11
3400 * has an average of 2 ticks / 84 ns.)
3401 *
3402 * With the same setup the TMR3VirtualSyncFF and else branch here profiles
3403 * to 79751 ticks / 26583 ns on average, with a min of 1194 ticks / 398 ns.
3404 * (An M2 booting win11 has an average of 24 ticks / 1008 ns, with a min of
3405 * 8 ticks / 336 ns.)
3406 *
3407 * If we get a zero return value we run timers. Non-timer EMTs shouldn't
3408 * ever see a zero value here, so we just call TMR3TimerQueuesDo. However,
3409 * we do not re-run timers if we already called TMR3VirtualSyncFF above, we
3410 * try to make sure some code is executed first.
3411 */
3412 uint64_t nsNow = 0;
3413 uint64_t cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3414 if (cNsDelta >= 1) /* It is okay to run virtual sync timers a little early. */
3415 { /* likely */ }
3416 else if (!fRanTimers || VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
3417 {
3418 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3419 TMR3TimerQueuesDo(pVM);
3420 fRanTimers = true;
3421 nsNow = 0;
3422 cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3423 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3424 }
3425 else
3426 cNsDelta = 33;
3427
3428 /*
3429 * Calc interval and update the timestamps.
3430 */
3431 uint64_t const cNsSinceLast = nsNow - pVCpu->iem.s.nsRecompilerPollNow;
3432 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3433 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3434
3435 /*
3436 * Set the next polling count down value.
3437 *
3438 * We take the previous value and adjust it according to the cNsSinceLast
3439 * value, if it's not within reason. This can't be too accurate since the
3440 * CheckIrq and intra-TB-checks aren't evenly spaced, they depends highly
3441 * on the guest code.
3442 */
3443#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3444 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3445 if (cNsDelta >= RT_NS_1SEC / 4)
3446 {
3447 /*
3448 * Non-timer EMTs should end up here with a fixed 500ms delta, just return
3449 * the max and keep the polling over head to the deadicated timer EMT.
3450 */
3451 AssertCompile(IEM_TIMER_POLL_MAX_ITER * IEM_TIMER_POLL_DEFAULT_FACTOR <= RT_NS_100MS);
3452 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3453 }
3454 else
3455 {
3456 /*
3457 * This is the timer EMT.
3458 */
3459 if (cNsDelta <= IEM_TIMER_POLL_MIN_NS)
3460 {
3461 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollTiny);
3462 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3463 }
3464 else
3465 {
3466 uint32_t const cNsDeltaAdj = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS : (uint32_t)cNsDelta;
3467 uint32_t const cNsDeltaSlack = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS / 2 : cNsDeltaAdj / 4;
3468 if ( cNsSinceLast < RT_MAX(IEM_TIMER_POLL_MIN_NS, 64)
3469 || cItersTillNextPoll < IEM_TIMER_POLL_MIN_ITER /* paranoia */)
3470 {
3471 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollDefaultCalc);
3472 cItersTillNextPoll = iemPollTimersCalcDefaultCountdown(cNsDeltaAdj);
3473 }
3474 else if ( cNsSinceLast >= cNsDeltaAdj + cNsDeltaSlack
3475 || cNsSinceLast <= cNsDeltaAdj - cNsDeltaSlack)
3476 {
3477 if (cNsSinceLast >= cItersTillNextPoll)
3478 {
3479 uint32_t uFactor = (uint32_t)(cNsSinceLast + cItersTillNextPoll - 1) / cItersTillNextPoll;
3480 cItersTillNextPoll = cNsDeltaAdj / uFactor;
3481 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorDivision, uFactor);
3482 }
3483 else
3484 {
3485 uint32_t uFactor = cItersTillNextPoll / (uint32_t)cNsSinceLast;
3486 cItersTillNextPoll = cNsDeltaAdj * uFactor;
3487 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorMultiplication, uFactor);
3488 }
3489
3490 if (cItersTillNextPoll >= IEM_TIMER_POLL_MIN_ITER)
3491 {
3492 if (cItersTillNextPoll <= IEM_TIMER_POLL_MAX_ITER)
3493 { /* likely */ }
3494 else
3495 {
3496 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollMax);
3497 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3498 }
3499 }
3500 else
3501 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3502 }
3503 else
3504 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollUnchanged);
3505 }
3506 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3507 }
3508#else
3509/** Poll timers every 400 us / 2500 Hz. (source: thin air) */
3510# define IEM_TIMER_POLL_IDEAL_NS (400U * RT_NS_1US)
3511 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3512 uint32_t const cNsIdealPollInterval = IEM_TIMER_POLL_IDEAL_NS;
3513 int64_t const nsFromIdeal = cNsSinceLast - cNsIdealPollInterval;
3514 if (nsFromIdeal < 0)
3515 {
3516 if ((uint64_t)-nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll < _64K)
3517 {
3518 cItersTillNextPoll += cItersTillNextPoll / 8;
3519 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3520 }
3521 }
3522 else
3523 {
3524 if ((uint64_t)nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll > 256)
3525 {
3526 cItersTillNextPoll -= cItersTillNextPoll / 8;
3527 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3528 }
3529 }
3530#endif
3531 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillNextPoll;
3532
3533 /*
3534 * Repeat the IRQ and FF checks.
3535 */
3536 if (cNsDelta > 0)
3537 {
3538 uint32_t fCpu = pVCpu->fLocalForcedActions;
3539 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3540 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3541 | VMCPU_FF_TLB_FLUSH
3542 | VMCPU_FF_UNHALT );
3543 if (RT_LIKELY( ( !fCpu
3544 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3545 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3546 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx)) ) )
3547 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
3548 {
3549 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3550 return VINF_SUCCESS;
3551 }
3552 }
3553 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3554 return VINF_IEM_REEXEC_BREAK_FF;
3555}
3556
3557
3558/** Helper for iemTbExec. */
3559DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
3560{
3561 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
3562 Assert(idx < pTb->cTbLookupEntries);
3563 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
3564}
3565
3566
3567/**
3568 * Executes a translation block.
3569 *
3570 * @returns Strict VBox status code.
3571 * @param pVCpu The cross context virtual CPU structure of the calling
3572 * thread.
3573 * @param pTb The translation block to execute.
3574 */
3575static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
3576{
3577 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
3578
3579 /*
3580 * Set the current TB so CIMPL functions may get at it.
3581 */
3582 pVCpu->iem.s.pCurTbR3 = pTb;
3583 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
3584
3585 /*
3586 * Execute the block.
3587 */
3588#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3589 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
3590 {
3591 pVCpu->iem.s.cTbExecNative++;
3592 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
3593# ifdef LOG_ENABLED
3594 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
3595# endif
3596
3597# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3598 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
3599# endif
3600# ifdef RT_ARCH_AMD64
3601 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
3602# else
3603 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
3604# endif
3605
3606# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3607 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3608# endif
3609# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3610 /* Restore FPCR/MXCSR if the TB modified it. */
3611 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3612 {
3613 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3614 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3615 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3616 }
3617# endif
3618# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3619 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
3620# endif
3621 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3622 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3623 { /* likely */ }
3624 else
3625 {
3626 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
3627 pVCpu->iem.s.pCurTbR3 = NULL;
3628
3629 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3630 only to break out of TB execution early. */
3631 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3632 {
3633 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
3634 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3635 }
3636
3637 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
3638 only to break out of TB execution early due to pending FFs. */
3639 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
3640 {
3641 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
3642 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3643 }
3644
3645 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
3646 and converted to VINF_SUCCESS or whatever is appropriate. */
3647 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
3648 {
3649 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
3650 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
3651 }
3652
3653 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
3654 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3655 }
3656 }
3657 else
3658#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
3659 {
3660 /*
3661 * The threaded execution loop.
3662 */
3663 pVCpu->iem.s.cTbExecThreaded++;
3664 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
3665#ifdef LOG_ENABLED
3666 uint64_t uRipPrev = UINT64_MAX;
3667#endif
3668 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3669 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3670 while (cCallsLeft-- > 0)
3671 {
3672#ifdef LOG_ENABLED
3673 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
3674 {
3675 uRipPrev = pVCpu->cpum.GstCtx.rip;
3676 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
3677 }
3678 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
3679 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
3680 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
3681#endif
3682#ifdef VBOX_WITH_STATISTICS
3683 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
3684 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
3685#endif
3686 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
3687 pCallEntry->auParams[0],
3688 pCallEntry->auParams[1],
3689 pCallEntry->auParams[2]);
3690 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3691 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3692 pCallEntry++;
3693 else if (rcStrict == VINF_IEM_REEXEC_JUMP)
3694 {
3695 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
3696 Assert(cCallsLeft == 0);
3697 uint32_t const idxTarget = (uint32_t)pCallEntry->auParams[0];
3698 cCallsLeft = pTb->Thrd.cCalls;
3699 AssertBreak(idxTarget < cCallsLeft - 1);
3700 cCallsLeft -= idxTarget;
3701 pCallEntry = &pTb->Thrd.paCalls[idxTarget];
3702 AssertBreak(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET);
3703 }
3704 else
3705 {
3706 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
3707 pVCpu->iem.s.pCurTbR3 = NULL;
3708 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
3709 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
3710
3711 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3712 only to break out of TB execution early. */
3713 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3714 {
3715#ifdef VBOX_WITH_STATISTICS
3716 if (pCallEntry->uTbLookup)
3717 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3718 else
3719 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3720#endif
3721 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3722 }
3723 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3724 }
3725 }
3726
3727 /* Update the lookup entry. */
3728 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3729 }
3730
3731 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3732 pVCpu->iem.s.pCurTbR3 = NULL;
3733 return VINF_SUCCESS;
3734}
3735
3736
3737/**
3738 * This is called when the PC doesn't match the current pbInstrBuf.
3739 *
3740 * Upon return, we're ready for opcode fetching. But please note that
3741 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3742 * MMIO or unassigned).
3743 */
3744static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3745{
3746 pVCpu->iem.s.pbInstrBuf = NULL;
3747 pVCpu->iem.s.offCurInstrStart = 0;
3748 pVCpu->iem.s.offInstrNextByte = 0;
3749 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3750 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3751}
3752
3753
3754/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3755DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3756{
3757 /*
3758 * Set uCurTbStartPc to RIP and calc the effective PC.
3759 */
3760 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3761#if 0 /* unused */
3762 pVCpu->iem.s.uCurTbStartPc = uPc;
3763#endif
3764 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3765 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3766
3767 /*
3768 * Advance within the current buffer (PAGE) when possible.
3769 */
3770 if (pVCpu->iem.s.pbInstrBuf)
3771 {
3772 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3773 if (off < pVCpu->iem.s.cbInstrBufTotal)
3774 {
3775 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3776 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3777 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3778 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3779 else
3780 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3781
3782 return pVCpu->iem.s.GCPhysInstrBuf + off;
3783 }
3784 }
3785 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3786}
3787
3788
3789/**
3790 * Determines the extra IEMTB_F_XXX flags.
3791 *
3792 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3793 * IEMTB_F_CS_LIM_CHECKS (or zero).
3794 * @param pVCpu The cross context virtual CPU structure of the calling
3795 * thread.
3796 */
3797DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3798{
3799 uint32_t fRet = 0;
3800
3801 /*
3802 * Determine the inhibit bits.
3803 */
3804 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3805 { /* typical */ }
3806 else
3807 {
3808 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3809 fRet |= IEMTB_F_INHIBIT_SHADOW;
3810 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3811 fRet |= IEMTB_F_INHIBIT_NMI;
3812 }
3813
3814 /*
3815 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3816 * likely to go invalid before the end of the translation block.
3817 */
3818 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3819 return fRet;
3820
3821 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3822 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3823 return fRet;
3824 return fRet | IEMTB_F_CS_LIM_CHECKS;
3825}
3826
3827
3828VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu, bool fWasHalted)
3829{
3830 /*
3831 * See if there is an interrupt pending in TRPM, inject it if we can.
3832 */
3833 if (!TRPMHasTrap(pVCpu))
3834 { /* likely */ }
3835 else
3836 {
3837 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3838 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3839 { /*likely */ }
3840 else
3841 return rcStrict;
3842 }
3843
3844 /*
3845 * Init the execution environment.
3846 */
3847#if 1 /** @todo this seems like a good idea, however if we ever share memory
3848 * directly with other threads on the host, it isn't necessarily... */
3849 if (pVM->cCpus == 1)
3850 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3851 else
3852#endif
3853 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3854
3855 if (RT_LIKELY(!fWasHalted && pVCpu->iem.s.msRecompilerPollNow != 0))
3856 { }
3857 else
3858 {
3859 /* Do polling after halt and the first time we get here. */
3860#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3861 uint64_t nsNow = 0;
3862 uint32_t const cItersTillPoll = iemPollTimersCalcDefaultCountdown(TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow));
3863 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillPoll;
3864 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillPoll;
3865#else
3866 uint64_t const nsNow = TMVirtualGetNoCheck(pVM);
3867#endif
3868 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3869 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3870 }
3871 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3872
3873 /*
3874 * Run-loop.
3875 *
3876 * If we're using setjmp/longjmp we combine all the catching here to avoid
3877 * having to call setjmp for each block we're executing.
3878 */
3879 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3880 for (;;)
3881 {
3882 VBOXSTRICTRC rcStrict;
3883 IEM_TRY_SETJMP(pVCpu, rcStrict)
3884 {
3885 for (;;)
3886 {
3887 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3888 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3889 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3890 {
3891 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3892 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3893 if (pTb)
3894 rcStrict = iemTbExec(pVCpu, pTb);
3895 else
3896 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3897 }
3898 else
3899 {
3900 /* This can only happen if the current PC cannot be translated into a
3901 host pointer, which means we're in MMIO or unmapped memory... */
3902#if defined(VBOX_STRICT) && defined(IN_RING3)
3903 rcStrict = DBGFSTOP(pVM);
3904 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3905 return rcStrict;
3906#endif
3907 rcStrict = IEMExecLots(pVCpu, 2048, 511, NULL);
3908 }
3909 if (rcStrict == VINF_SUCCESS)
3910 {
3911 Assert(pVCpu->iem.s.cActiveMappings == 0);
3912
3913 /* Note! This IRQ/FF check is repeated in iemPollTimers, iemThreadedFunc_BltIn_CheckIrq
3914 and emitted by iemNativeRecompFunc_BltIn_CheckIrq. */
3915 uint64_t fCpu = pVCpu->fLocalForcedActions;
3916 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3917 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3918 | VMCPU_FF_TLB_FLUSH
3919 | VMCPU_FF_UNHALT );
3920 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3921 if (RT_LIKELY( ( !fCpu
3922 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3923 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3924 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3925 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3926 {
3927 /* Once in a while we need to poll timers here. */
3928 if ((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
3929 { /* likely */ }
3930 else
3931 {
3932 int rc = iemPollTimers(pVM, pVCpu);
3933 if (rc != VINF_SUCCESS)
3934 return VINF_SUCCESS;
3935 }
3936 }
3937 else
3938 return VINF_SUCCESS;
3939 }
3940 else
3941 return rcStrict;
3942 }
3943 }
3944 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3945 {
3946 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3947 pVCpu->iem.s.cLongJumps++;
3948#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3949 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3950#endif
3951 if (pVCpu->iem.s.cActiveMappings > 0)
3952 iemMemRollback(pVCpu);
3953
3954#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3955 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3956 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3957 {
3958 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3959# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3960 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3961 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3962# endif
3963
3964#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3965 /* Restore FPCR/MXCSR if the TB modified it. */
3966 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3967 {
3968 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3969 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3970 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3971 }
3972#endif
3973 }
3974#endif
3975
3976#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3977 /* If pTb isn't NULL we're in iemTbExec. */
3978 if (!pTb)
3979 {
3980 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3981 pTb = pVCpu->iem.s.pCurTbR3;
3982 if (pTb)
3983 {
3984 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3985 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3986 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3987 }
3988 }
3989#endif
3990 pVCpu->iem.s.pCurTbR3 = NULL;
3991 return rcStrict;
3992 }
3993 IEM_CATCH_LONGJMP_END(pVCpu);
3994 }
3995}
3996
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette