VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 106329

Last change on this file since 106329 was 106329, checked in by vboxsync, 6 weeks ago

VMM/IEM: Some minor perf tweaks for iemExecMemAllocatorPrune. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 161.5 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 106329 2024-10-15 14:19:43Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
117# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
118#endif
119
120
121/*********************************************************************************************************************************
122* Internal Functions *
123*********************************************************************************************************************************/
124#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
125static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb);
126#endif
127
128
129/**
130 * Calculates the effective address of a ModR/M memory operand, extended version
131 * for use in the recompilers.
132 *
133 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
134 *
135 * May longjmp on internal error.
136 *
137 * @return The effective address.
138 * @param pVCpu The cross context virtual CPU structure of the calling thread.
139 * @param bRm The ModRM byte.
140 * @param cbImmAndRspOffset - First byte: The size of any immediate
141 * following the effective address opcode bytes
142 * (only for RIP relative addressing).
143 * - Second byte: RSP displacement (for POP [ESP]).
144 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
145 * SIB byte (bits 39:32).
146 *
147 * @note This must be defined in a source file with matching
148 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
149 * or implemented differently...
150 */
151RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
152{
153 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
154# define SET_SS_DEF() \
155 do \
156 { \
157 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
158 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
159 } while (0)
160
161 if (!IEM_IS_64BIT_CODE(pVCpu))
162 {
163/** @todo Check the effective address size crap! */
164 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
165 {
166 uint16_t u16EffAddr;
167
168 /* Handle the disp16 form with no registers first. */
169 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
170 {
171 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
172 *puInfo = u16EffAddr;
173 }
174 else
175 {
176 /* Get the displacment. */
177 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
178 {
179 case 0: u16EffAddr = 0; break;
180 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
181 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
182 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
183 }
184 *puInfo = u16EffAddr;
185
186 /* Add the base and index registers to the disp. */
187 switch (bRm & X86_MODRM_RM_MASK)
188 {
189 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
190 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
191 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
192 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
193 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
194 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
195 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
196 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
197 }
198 }
199
200 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
201 return u16EffAddr;
202 }
203
204 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
205 uint32_t u32EffAddr;
206 uint64_t uInfo;
207
208 /* Handle the disp32 form with no registers first. */
209 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
210 {
211 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
212 uInfo = u32EffAddr;
213 }
214 else
215 {
216 /* Get the register (or SIB) value. */
217 uInfo = 0;
218 switch ((bRm & X86_MODRM_RM_MASK))
219 {
220 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
221 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
222 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
223 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
224 case 4: /* SIB */
225 {
226 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
227 uInfo = (uint64_t)bSib << 32;
228
229 /* Get the index and scale it. */
230 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
231 {
232 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
233 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
234 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
235 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
236 case 4: u32EffAddr = 0; /*none */ break;
237 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
238 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
239 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
240 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
241 }
242 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
243
244 /* add base */
245 switch (bSib & X86_SIB_BASE_MASK)
246 {
247 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
248 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
249 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
250 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
251 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
252 case 5:
253 if ((bRm & X86_MODRM_MOD_MASK) != 0)
254 {
255 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
256 SET_SS_DEF();
257 }
258 else
259 {
260 uint32_t u32Disp;
261 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
262 u32EffAddr += u32Disp;
263 uInfo |= u32Disp;
264 }
265 break;
266 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
267 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
268 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
269 }
270 break;
271 }
272 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
273 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
274 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
275 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
276 }
277
278 /* Get and add the displacement. */
279 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
280 {
281 case 0:
282 break;
283 case 1:
284 {
285 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
286 u32EffAddr += i8Disp;
287 uInfo |= (uint32_t)(int32_t)i8Disp;
288 break;
289 }
290 case 2:
291 {
292 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
293 u32EffAddr += u32Disp;
294 uInfo |= u32Disp;
295 break;
296 }
297 default:
298 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
299 }
300 }
301
302 *puInfo = uInfo;
303 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
304 return u32EffAddr;
305 }
306
307 uint64_t u64EffAddr;
308 uint64_t uInfo;
309
310 /* Handle the rip+disp32 form with no registers first. */
311 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
312 {
313 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
314 uInfo = (uint32_t)u64EffAddr;
315 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
316 }
317 else
318 {
319 /* Get the register (or SIB) value. */
320 uInfo = 0;
321 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
322 {
323 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
324 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
325 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
326 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
327 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
328 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
329 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
330 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
331 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
332 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
333 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
334 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
335 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
336 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
337 /* SIB */
338 case 4:
339 case 12:
340 {
341 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
342 uInfo = (uint64_t)bSib << 32;
343
344 /* Get the index and scale it. */
345 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
346 {
347 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
348 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
349 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
350 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
351 case 4: u64EffAddr = 0; /*none */ break;
352 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
353 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
354 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
355 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
356 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
357 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
358 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
359 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
360 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
361 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
362 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
363 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
364 }
365 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
366
367 /* add base */
368 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
369 {
370 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
371 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
372 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
373 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
374 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
375 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
376 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
377 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
378 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
379 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
380 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
381 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
382 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
383 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
384 /* complicated encodings */
385 case 5:
386 case 13:
387 if ((bRm & X86_MODRM_MOD_MASK) != 0)
388 {
389 if (!pVCpu->iem.s.uRexB)
390 {
391 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
392 SET_SS_DEF();
393 }
394 else
395 u64EffAddr += pVCpu->cpum.GstCtx.r13;
396 }
397 else
398 {
399 uint32_t u32Disp;
400 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
401 u64EffAddr += (int32_t)u32Disp;
402 uInfo |= u32Disp;
403 }
404 break;
405 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
406 }
407 break;
408 }
409 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
410 }
411
412 /* Get and add the displacement. */
413 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
414 {
415 case 0:
416 break;
417 case 1:
418 {
419 int8_t i8Disp;
420 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
421 u64EffAddr += i8Disp;
422 uInfo |= (uint32_t)(int32_t)i8Disp;
423 break;
424 }
425 case 2:
426 {
427 uint32_t u32Disp;
428 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
429 u64EffAddr += (int32_t)u32Disp;
430 uInfo |= u32Disp;
431 break;
432 }
433 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
434 }
435
436 }
437
438 *puInfo = uInfo;
439 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
440 {
441 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
442 return u64EffAddr;
443 }
444 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
445 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
446 return u64EffAddr & UINT32_MAX;
447}
448
449
450
451/*********************************************************************************************************************************
452* Translation Block Cache. *
453*********************************************************************************************************************************/
454
455/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
456static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
457{
458 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
459 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
460 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
461 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
462 if (cMsSinceUse1 != cMsSinceUse2)
463 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
464 if (pTb1->cUsed != pTb2->cUsed)
465 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
466 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
467 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
468 return 0;
469}
470
471#ifdef VBOX_STRICT
472/**
473 * Assertion helper that checks a collisions list count.
474 */
475static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
476{
477 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
478 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
479 while (pTb)
480 {
481 pTb = pTb->pNext;
482 cLeft--;
483 }
484 AssertMsg(cLeft == 0,
485 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
486 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
487}
488#endif
489
490
491DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
492{
493 STAM_PROFILE_START(&pTbCache->StatPrune, a);
494
495 /*
496 * First convert the collision list to an array.
497 */
498 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
499 uintptr_t cInserted = 0;
500 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
501
502 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
503
504 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
505 {
506 apSortedTbs[cInserted++] = pTbCollision;
507 pTbCollision = pTbCollision->pNext;
508 }
509
510 /* Free any excess (impossible). */
511 if (RT_LIKELY(!pTbCollision))
512 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
513 else
514 do
515 {
516 PIEMTB pTbToFree = pTbCollision;
517 pTbCollision = pTbToFree->pNext;
518 iemTbAllocatorFree(pVCpu, pTbToFree);
519 } while (pTbCollision);
520
521 /*
522 * Sort it by most recently used and usage count.
523 */
524 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
525
526 /* We keep half the list for now. Perhaps a bit aggressive... */
527 uintptr_t const cKeep = cInserted / 2;
528
529 /* First free up the TBs we don't wish to keep (before creating the new
530 list because otherwise the free code will scan the list for each one
531 without ever finding it). */
532 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
533 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
534
535 /* Then chain the new TB together with the ones we like to keep of the
536 existing ones and insert this list into the hash table. */
537 pTbCollision = pTb;
538 for (uintptr_t idx = 0; idx < cKeep; idx++)
539 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
540 pTbCollision->pNext = NULL;
541
542 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
543#ifdef VBOX_STRICT
544 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
545#endif
546
547 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
548}
549
550
551static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
552{
553 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
554 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
555 if (!pTbOldHead)
556 {
557 pTb->pNext = NULL;
558 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
559 }
560 else
561 {
562 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
563 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
564 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
565 {
566 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
567 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
568#ifdef VBOX_STRICT
569 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
570#endif
571 }
572 else
573 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
574 }
575}
576
577
578/**
579 * Unlinks @a pTb from the hash table if found in it.
580 *
581 * @returns true if unlinked, false if not present.
582 * @param pTbCache The hash table.
583 * @param pTb The TB to remove.
584 */
585static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
586{
587 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
588 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
589 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
590
591 /*
592 * At the head of the collision list?
593 */
594 if (pTbHash == pTb)
595 {
596 if (!pTb->pNext)
597 pTbCache->apHash[idxHash] = NULL;
598 else
599 {
600 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
601 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
602#ifdef VBOX_STRICT
603 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
604#endif
605 }
606 return true;
607 }
608
609 /*
610 * Search the collision list.
611 */
612 PIEMTB const pTbHead = pTbHash;
613 while (pTbHash)
614 {
615 PIEMTB const pNextTb = pTbHash->pNext;
616 if (pNextTb == pTb)
617 {
618 pTbHash->pNext = pTb->pNext;
619 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
620#ifdef VBOX_STRICT
621 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
622#endif
623 return true;
624 }
625 pTbHash = pNextTb;
626 }
627 return false;
628}
629
630
631/**
632 * Looks up a TB for the given PC and flags in the cache.
633 *
634 * @returns Pointer to TB on success, NULL if not found.
635 * @param pVCpu The cross context virtual CPU structure of the
636 * calling thread.
637 * @param pTbCache The translation block cache.
638 * @param GCPhysPc The PC to look up a TB for.
639 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
640 * the lookup.
641 * @thread EMT(pVCpu)
642 */
643static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
644 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
645{
646 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
647
648 /*
649 * First consult the lookup table entry.
650 */
651 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
652 PIEMTB pTb = *ppTbLookup;
653 if (pTb)
654 {
655 if (pTb->GCPhysPc == GCPhysPc)
656 {
657 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
658 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
659 {
660 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
661 {
662 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
663 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
664 pTb->cUsed++;
665#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
666 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
667 {
668 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
669 return pTb;
670 }
671 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
672# ifdef VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING
673 iemThreadedSaveTbForProfiling(pVCpu, pTb);
674# endif
675 return iemNativeRecompile(pVCpu, pTb);
676#else
677 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
678 return pTb;
679#endif
680 }
681 }
682 }
683 }
684
685 /*
686 * Then consult the hash table.
687 */
688 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
689#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
690 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
691#endif
692 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
693 while (pTb)
694 {
695 if (pTb->GCPhysPc == GCPhysPc)
696 {
697 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
698 {
699 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
700 {
701 STAM_COUNTER_INC(&pTbCache->cLookupHits);
702 AssertMsg(cLeft > 0, ("%d\n", cLeft));
703
704 *ppTbLookup = pTb;
705 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
706 pTb->cUsed++;
707#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
708 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
709 {
710 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
711 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
712 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
713 return pTb;
714 }
715 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
716 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
717 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
718 return iemNativeRecompile(pVCpu, pTb);
719#else
720 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
721 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
722 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
723 return pTb;
724#endif
725 }
726 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
727 }
728 else
729 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
730 }
731 else
732 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
733
734 pTb = pTb->pNext;
735#ifdef VBOX_STRICT
736 cLeft--;
737#endif
738 }
739 AssertMsg(cLeft == 0, ("%d\n", cLeft));
740 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
741 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
742 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
743 return pTb;
744}
745
746
747/*********************************************************************************************************************************
748* Translation Block Allocator.
749*********************************************************************************************************************************/
750/*
751 * Translation block allocationmanagement.
752 */
753
754#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
755# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
756 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
757# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
758 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
759# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
760 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
761#else
762# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
763 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
764# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
765 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
766# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
767 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
768#endif
769/** Makes a TB index from a chunk index and TB index within that chunk. */
770#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
771 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
772
773
774/**
775 * Initializes the TB allocator and cache for an EMT.
776 *
777 * @returns VBox status code.
778 * @param pVM The VM handle.
779 * @param cInitialTbs The initial number of translation blocks to
780 * preallocator.
781 * @param cMaxTbs The max number of translation blocks allowed.
782 * @param cbInitialExec The initial size of the executable memory allocator.
783 * @param cbMaxExec The max size of the executable memory allocator.
784 * @param cbChunkExec The chunk size for executable memory allocator. Zero
785 * or UINT32_MAX for automatically determining this.
786 * @thread EMT
787 */
788DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
789 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
790{
791 PVMCPUCC pVCpu = VMMGetCpu(pVM);
792 Assert(!pVCpu->iem.s.pTbCacheR3);
793 Assert(!pVCpu->iem.s.pTbAllocatorR3);
794
795 /*
796 * Calculate the chunk size of the TB allocator.
797 * The minimum chunk size is 2MiB.
798 */
799 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
800 uint32_t cbPerChunk = _2M;
801 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
802#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
803 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
804 uint8_t cChunkShift = 21 - cTbShift;
805 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
806#endif
807 for (;;)
808 {
809 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
810 break;
811 cbPerChunk *= 2;
812 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
813#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
814 cChunkShift += 1;
815#endif
816 }
817
818 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
819 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
820 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
821
822 cMaxTbs = cMaxChunks * cTbsPerChunk;
823
824 /*
825 * Allocate and initalize it.
826 */
827 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
828 if (!pTbAllocator)
829 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
830 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
831 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
832 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
833 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
834 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
835 pTbAllocator->cbPerChunk = cbPerChunk;
836 pTbAllocator->cMaxTbs = cMaxTbs;
837 pTbAllocator->pTbsFreeHead = NULL;
838#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
839 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
840 pTbAllocator->cChunkShift = cChunkShift;
841 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
842#endif
843
844 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
845
846 /*
847 * Allocate the initial chunks.
848 */
849 for (uint32_t idxChunk = 0; ; idxChunk++)
850 {
851 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
852 if (!paTbs)
853 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
854 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
855 cbPerChunk, idxChunk, pVCpu->idCpu);
856
857 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
858 {
859 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
860 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
861 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
862 }
863 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
864 pTbAllocator->cTotalTbs += cTbsPerChunk;
865
866 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
867 break;
868 }
869
870 /*
871 * Calculate the size of the hash table. We double the max TB count and
872 * round it up to the nearest power of two.
873 */
874 uint32_t cCacheEntries = cMaxTbs * 2;
875 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
876 {
877 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
878 cCacheEntries = RT_BIT_32(iBitTop);
879 Assert(cCacheEntries >= cMaxTbs * 2);
880 }
881
882 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
883 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
884 if (!pTbCache)
885 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
886 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
887 cbTbCache, cCacheEntries, pVCpu->idCpu);
888
889 /*
890 * Initialize it (assumes zeroed by the allocator).
891 */
892 pTbCache->uMagic = IEMTBCACHE_MAGIC;
893 pTbCache->cHash = cCacheEntries;
894 pTbCache->uHashMask = cCacheEntries - 1;
895 Assert(pTbCache->cHash > pTbCache->uHashMask);
896 pVCpu->iem.s.pTbCacheR3 = pTbCache;
897
898 /*
899 * Initialize the native executable memory allocator.
900 */
901#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
902 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
903 AssertLogRelRCReturn(rc, rc);
904#else
905 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
906#endif
907
908 return VINF_SUCCESS;
909}
910
911
912/**
913 * Inner free worker.
914 *
915 * The @a a_fType parameter allows us to eliminate the type check when we know
916 * which type of TB is being freed.
917 */
918template<uint32_t a_fType>
919DECL_FORCE_INLINE(void)
920iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
921{
922#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
923 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED || a_fType == IEMTB_F_TYPE_NATIVE);
924#else
925 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED);
926#endif
927 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
928 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
929 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
930#ifdef VBOX_STRICT
931 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
932 Assert(pTbOther != pTb);
933#endif
934
935 /*
936 * Unlink the TB from the hash table.
937 */
938 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
939
940 /*
941 * Free the TB itself.
942 */
943 if RT_CONSTEXPR_IF(a_fType == 0)
944 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
945 {
946 case IEMTB_F_TYPE_THREADED:
947 pTbAllocator->cThreadedTbs -= 1;
948 RTMemFree(pTb->Thrd.paCalls);
949 break;
950#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
951 case IEMTB_F_TYPE_NATIVE:
952 pTbAllocator->cNativeTbs -= 1;
953 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
954 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
955 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
956 break;
957#endif
958 default:
959 AssertFailed();
960 }
961#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
962 else if RT_CONSTEXPR_IF(a_fType == IEMTB_F_TYPE_NATIVE)
963 {
964 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
965 pTbAllocator->cNativeTbs -= 1;
966 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
967 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
968 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
969 }
970#endif
971 else
972 {
973 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
974 pTbAllocator->cThreadedTbs -= 1;
975 RTMemFree(pTb->Thrd.paCalls);
976 }
977
978 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
979
980 pTb->pNext = pTbAllocator->pTbsFreeHead;
981 pTbAllocator->pTbsFreeHead = pTb;
982 pTb->fFlags = 0;
983 pTb->GCPhysPc = UINT64_MAX;
984 pTb->Gen.uPtr = 0;
985 pTb->Gen.uData = 0;
986 pTb->cTbLookupEntries = 0;
987 pTb->cbOpcodes = 0;
988 pTb->pabOpcodes = NULL;
989
990 Assert(pTbAllocator->cInUseTbs > 0);
991
992 pTbAllocator->cInUseTbs -= 1;
993 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
994}
995
996
997/**
998 * Frees the given TB.
999 *
1000 * @param pVCpu The cross context virtual CPU structure of the calling
1001 * thread.
1002 * @param pTb The translation block to free.
1003 * @thread EMT(pVCpu)
1004 */
1005DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
1006{
1007 /*
1008 * Validate state.
1009 */
1010 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1011 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1012 uint8_t const idxChunk = pTb->idxAllocChunk;
1013 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1014 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1015 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1016
1017 /*
1018 * Invalidate the TB lookup pointer and call the inner worker.
1019 */
1020 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1021 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1022}
1023
1024#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
1025
1026/**
1027 * Interface used by iemExecMemAllocatorPrune.
1028 */
1029DECLHIDDEN(void) iemTbAllocatorFreeBulk(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb)
1030{
1031 Assert(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1032
1033 uint8_t const idxChunk = pTb->idxAllocChunk;
1034 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1035 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1036 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1037
1038 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1039}
1040
1041
1042/**
1043 * Interface used by iemExecMemAllocatorPrune.
1044 */
1045DECLHIDDEN(PIEMTBALLOCATOR) iemTbAllocatorFreeBulkStart(PVMCPUCC pVCpu)
1046{
1047 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1048 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1049
1050 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1051
1052 /* It should be sufficient to do this once. */
1053 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1054
1055 return pTbAllocator;
1056}
1057
1058#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
1059
1060/**
1061 * Schedules a TB for freeing when it's not longer being executed and/or part of
1062 * the caller's call stack.
1063 *
1064 * The TB will be removed from the translation block cache, though, so it isn't
1065 * possible to executed it again and the IEMTB::pNext member can be used to link
1066 * it together with other TBs awaiting freeing.
1067 *
1068 * @param pVCpu The cross context virtual CPU structure of the calling
1069 * thread.
1070 * @param pTb The translation block to schedule for freeing.
1071 */
1072static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1073{
1074 /*
1075 * Validate state.
1076 */
1077 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1078 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1079 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1080 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1081 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1082 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1083#ifdef VBOX_STRICT
1084 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1085 Assert(pTbOther != pTb);
1086#endif
1087
1088 /*
1089 * Remove it from the cache and prepend it to the allocator's todo list.
1090 *
1091 * Note! It could still be in various lookup tables, so we trash the GCPhys
1092 * and CS attribs to ensure it won't be reused.
1093 */
1094 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1095 pTb->GCPhysPc = NIL_RTGCPHYS;
1096 pTb->x86.fAttr = UINT16_MAX;
1097
1098 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1099 pTbAllocator->pDelayedFreeHead = pTb;
1100}
1101
1102
1103/**
1104 * Processes the delayed frees.
1105 *
1106 * This is called by the allocator function as well as the native recompile
1107 * function before making any TB or executable memory allocations respectively.
1108 */
1109void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1110{
1111 /** @todo r-bird: these have already been removed from the cache,
1112 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1113 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1114 pTbAllocator->pDelayedFreeHead = NULL;
1115 while (pTb)
1116 {
1117 PIEMTB const pTbNext = pTb->pNext;
1118 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1119 iemTbAllocatorFree(pVCpu, pTb);
1120 pTb = pTbNext;
1121 }
1122}
1123
1124
1125#if 0
1126/**
1127 * Frees all TBs.
1128 */
1129static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1130{
1131 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1132 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1133 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1134
1135 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1136
1137 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1138 while (idxChunk-- > 0)
1139 {
1140 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1141 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1142 while (idxTb-- > 0)
1143 {
1144 PIEMTB const pTb = &paTbs[idxTb];
1145 if (pTb->fFlags)
1146 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1147 }
1148 }
1149
1150 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1151
1152# if 1
1153 /* Reset the free list. */
1154 pTbAllocator->pTbsFreeHead = NULL;
1155 idxChunk = pTbAllocator->cAllocatedChunks;
1156 while (idxChunk-- > 0)
1157 {
1158 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1159 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1160 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1161 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1162 {
1163 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1164 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1165 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1166 }
1167 }
1168# endif
1169
1170# if 1
1171 /* Completely reset the TB cache. */
1172 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1173# endif
1174
1175 return VINF_SUCCESS;
1176}
1177#endif
1178
1179
1180/**
1181 * Grow the translation block allocator with another chunk.
1182 */
1183static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1184{
1185 /*
1186 * Validate state.
1187 */
1188 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1189 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1190 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1191 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1192 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1193
1194 /*
1195 * Allocate a new chunk and add it to the allocator.
1196 */
1197 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1198 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1199 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1200
1201 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1202 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1203 {
1204 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1205 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1206 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1207 }
1208 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1209 pTbAllocator->cTotalTbs += cTbsPerChunk;
1210
1211 return VINF_SUCCESS;
1212}
1213
1214
1215/**
1216 * Allocates a TB from allocator with free block.
1217 *
1218 * This is common code to both the fast and slow allocator code paths.
1219 */
1220DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1221{
1222 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1223 Assert(pTbAllocator->pTbsFreeHead);
1224
1225 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1226 pTbAllocator->pTbsFreeHead = pTb->pNext;
1227 pTbAllocator->cInUseTbs += 1;
1228 if (fThreaded)
1229 pTbAllocator->cThreadedTbs += 1;
1230 else
1231 pTbAllocator->cNativeTbs += 1;
1232 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1233 return pTb;
1234}
1235
1236
1237/**
1238 * Slow path for iemTbAllocatorAlloc.
1239 */
1240static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1241{
1242 /*
1243 * With some luck we can add another chunk.
1244 */
1245 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1246 {
1247 int rc = iemTbAllocatorGrow(pVCpu);
1248 if (RT_SUCCESS(rc))
1249 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1250 }
1251
1252 /*
1253 * We have to prune stuff. Sigh.
1254 *
1255 * This requires scanning for older TBs and kick them out. Not sure how to
1256 * best do this as we don't want to maintain any list of TBs ordered by last
1257 * usage time. But one reasonably simple approach would be that each time we
1258 * get here we continue a sequential scan of the allocation chunks,
1259 * considering just a smallish number of TBs and freeing a fixed portion of
1260 * them. Say, we consider the next 128 TBs, freeing the least recently used
1261 * in out of groups of 4 TBs, resulting in 32 free TBs.
1262 */
1263 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1264 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1265 uint32_t const cTbsToPrune = 128;
1266 uint32_t const cTbsPerGroup = 4;
1267 uint32_t cFreedTbs = 0;
1268#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1269 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1270#else
1271 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1272#endif
1273 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1274 idxTbPruneFrom = 0;
1275 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1276 {
1277 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1278 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1279 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1280 uint32_t cMsAge = msNow - pTb->msLastUsed;
1281 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1282
1283 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1284 {
1285#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1286 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1287 { /* likely */ }
1288 else
1289 {
1290 idxInChunk2 = 0;
1291 idxChunk2 += 1;
1292 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1293 idxChunk2 = 0;
1294 }
1295#endif
1296 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1297 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1298 if ( cMsAge2 > cMsAge
1299 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1300 {
1301 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1302 pTb = pTb2;
1303 idxChunk = idxChunk2;
1304 idxInChunk = idxInChunk2;
1305 cMsAge = cMsAge2;
1306 }
1307 }
1308
1309 /* Free the TB. */
1310 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1311 cFreedTbs++; /* paranoia */
1312 }
1313 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1314 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1315
1316 /* Flush the TB lookup entry pointer. */
1317 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1318
1319 /*
1320 * Allocate a TB from the ones we've pruned.
1321 */
1322 if (cFreedTbs)
1323 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1324 return NULL;
1325}
1326
1327
1328/**
1329 * Allocate a translation block.
1330 *
1331 * @returns Pointer to block on success, NULL if we're out and is unable to
1332 * free up an existing one (very unlikely once implemented).
1333 * @param pVCpu The cross context virtual CPU structure of the calling
1334 * thread.
1335 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1336 * For statistics.
1337 */
1338DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1339{
1340 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1341 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1342
1343 /* Free any pending TBs before we proceed. */
1344 if (!pTbAllocator->pDelayedFreeHead)
1345 { /* probably likely */ }
1346 else
1347 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1348
1349 /* If the allocator is full, take slow code path.*/
1350 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1351 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1352 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1353}
1354
1355
1356/**
1357 * This is called when we're out of space for native TBs.
1358 *
1359 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1360 * The difference is that we only prune native TBs and will only free any if
1361 * there are least two in a group. The conditions under which we're called are
1362 * different - there will probably be free TBs in the table when we're called.
1363 * Therefore we increase the group size and max scan length, though we'll stop
1364 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1365 * up at least 8 TBs.
1366 */
1367void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1368{
1369 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1370 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1371
1372 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1373
1374 /*
1375 * Flush the delayed free list before we start freeing TBs indiscriminately.
1376 */
1377 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1378
1379 /*
1380 * Scan and free TBs.
1381 */
1382 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1383 uint32_t const cTbsToPrune = 128 * 8;
1384 uint32_t const cTbsPerGroup = 4 * 4;
1385 uint32_t cFreedTbs = 0;
1386 uint32_t cMaxInstrs = 0;
1387 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1388 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1389 {
1390 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1391 idxTbPruneFrom = 0;
1392 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1393 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1394 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1395 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1396 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1397
1398 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1399 {
1400 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1401 { /* likely */ }
1402 else
1403 {
1404 idxInChunk2 = 0;
1405 idxChunk2 += 1;
1406 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1407 idxChunk2 = 0;
1408 }
1409 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1410 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1411 {
1412 cNativeTbs += 1;
1413 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1414 if ( cMsAge2 > cMsAge
1415 || ( cMsAge2 == cMsAge
1416 && ( pTb2->cUsed < pTb->cUsed
1417 || ( pTb2->cUsed == pTb->cUsed
1418 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1419 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1420 {
1421 pTb = pTb2;
1422 idxChunk = idxChunk2;
1423 idxInChunk = idxInChunk2;
1424 cMsAge = cMsAge2;
1425 }
1426 }
1427 }
1428
1429 /* Free the TB if we found at least two native one in this group. */
1430 if (cNativeTbs >= 2)
1431 {
1432 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1433 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1434 cFreedTbs++;
1435 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1436 break;
1437 }
1438 }
1439 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1440
1441 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1442}
1443
1444
1445/*********************************************************************************************************************************
1446* Threaded Recompiler Core *
1447*********************************************************************************************************************************/
1448/**
1449 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1450 * @returns pszBuf.
1451 * @param fFlags The flags.
1452 * @param pszBuf The output buffer.
1453 * @param cbBuf The output buffer size. At least 32 bytes.
1454 */
1455DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1456{
1457 Assert(cbBuf >= 32);
1458 static RTSTRTUPLE const s_aModes[] =
1459 {
1460 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1461 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1462 /* [02] = */ { RT_STR_TUPLE("!2!") },
1463 /* [03] = */ { RT_STR_TUPLE("!3!") },
1464 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1465 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1466 /* [06] = */ { RT_STR_TUPLE("!6!") },
1467 /* [07] = */ { RT_STR_TUPLE("!7!") },
1468 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1469 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1470 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1471 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1472 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1473 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1474 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1475 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1476 /* [10] = */ { RT_STR_TUPLE("!10!") },
1477 /* [11] = */ { RT_STR_TUPLE("!11!") },
1478 /* [12] = */ { RT_STR_TUPLE("!12!") },
1479 /* [13] = */ { RT_STR_TUPLE("!13!") },
1480 /* [14] = */ { RT_STR_TUPLE("!14!") },
1481 /* [15] = */ { RT_STR_TUPLE("!15!") },
1482 /* [16] = */ { RT_STR_TUPLE("!16!") },
1483 /* [17] = */ { RT_STR_TUPLE("!17!") },
1484 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1485 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1486 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1487 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1488 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1489 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1490 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1491 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1492 };
1493 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1494 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1495 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1496
1497 pszBuf[off++] = ' ';
1498 pszBuf[off++] = 'C';
1499 pszBuf[off++] = 'P';
1500 pszBuf[off++] = 'L';
1501 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1502 Assert(off < 32);
1503
1504 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1505
1506 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1507 {
1508 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1509 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1510 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1511 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1512 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1513 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1514 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1515 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1516 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1517 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1518 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1519 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1520 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1521 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1522 };
1523 if (fFlags)
1524 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1525 if (s_aFlags[i].fFlag & fFlags)
1526 {
1527 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1528 pszBuf[off++] = ' ';
1529 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1530 off += s_aFlags[i].cchName;
1531 fFlags &= ~s_aFlags[i].fFlag;
1532 if (!fFlags)
1533 break;
1534 }
1535 pszBuf[off] = '\0';
1536
1537 return pszBuf;
1538}
1539
1540
1541/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1542static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1543{
1544 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1545 pDis->cbCachedInstr += cbMaxRead;
1546 RT_NOREF(cbMinRead);
1547 return VERR_NO_DATA;
1548}
1549
1550
1551/**
1552 * Worker for iemThreadedDisassembleTb.
1553 */
1554static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1555 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1556{
1557 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1558 {
1559 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1560 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1561 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1562 {
1563 PIEMTB pLookupTb = papTbLookup[iLookup];
1564 if (pLookupTb)
1565 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1566 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1567 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1568 : "invalid");
1569 else
1570 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1571 }
1572 pHlp->pfnPrintf(pHlp, "\n");
1573 }
1574 else
1575 {
1576 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1577 idxFirst, cEntries, pTb->cTbLookupEntries);
1578 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1579 }
1580}
1581
1582
1583DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1584{
1585 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1586
1587 char szDisBuf[512];
1588
1589 /*
1590 * Print TB info.
1591 */
1592 pHlp->pfnPrintf(pHlp,
1593 "pTb=%p: GCPhysPc=%RGp (%RGv) cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1594 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1595 pTb, pTb->GCPhysPc, pTb->FlatPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1596 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1597
1598 /*
1599 * This disassembly is driven by the debug info which follows the native
1600 * code and indicates when it starts with the next guest instructions,
1601 * where labels are and such things.
1602 */
1603 DISSTATE Dis;
1604 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1605 uint32_t const cCalls = pTb->Thrd.cCalls;
1606 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1607 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1608 : DISCPUMODE_64BIT;
1609 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1610 uint8_t idxRange = UINT8_MAX;
1611 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1612 uint32_t offRange = 0;
1613 uint32_t offOpcodes = 0;
1614 uint32_t const cbOpcodes = pTb->cbOpcodes;
1615 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1616 bool fTbLookupSeen0 = false;
1617
1618 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1619 {
1620 /*
1621 * New opcode range?
1622 */
1623 if ( idxRange == UINT8_MAX
1624 || idxRange >= cRanges
1625 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1626 {
1627 idxRange += 1;
1628 if (idxRange < cRanges)
1629 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1630 else
1631 continue;
1632 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1633 + (pTb->aRanges[idxRange].idxPhysPage == 0
1634 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1635 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1636 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1637 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1638 pTb->aRanges[idxRange].idxPhysPage);
1639 GCPhysPc += offRange;
1640 }
1641
1642 /*
1643 * Disassemble another guest instruction?
1644 */
1645 if ( paCalls[iCall].offOpcode != offOpcodes
1646 && paCalls[iCall].cbOpcode > 0
1647 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1648 {
1649 offOpcodes = paCalls[iCall].offOpcode;
1650 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1651 uint32_t cbInstr = 1;
1652 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1653 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1654 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1655 if (RT_SUCCESS(rc))
1656 {
1657 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1658 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1659 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1660 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1661 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1662 }
1663 else
1664 {
1665 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1666 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1667 cbInstr = paCalls[iCall].cbOpcode;
1668 }
1669 GCPhysPc += cbInstr;
1670 offRange += cbInstr;
1671 }
1672
1673 /*
1674 * Dump call details.
1675 */
1676 pHlp->pfnPrintf(pHlp,
1677 " Call #%u to %s (%u args)\n",
1678 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1679 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1680 if (paCalls[iCall].uTbLookup != 0)
1681 {
1682 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1683 fTbLookupSeen0 = idxFirst == 0;
1684 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1685 }
1686
1687 /*
1688 * Snoop fExec.
1689 */
1690 switch (paCalls[iCall].enmFunction)
1691 {
1692 default:
1693 break;
1694 case kIemThreadedFunc_BltIn_CheckMode:
1695 fExec = paCalls[iCall].auParams[0];
1696 break;
1697 }
1698 }
1699
1700 if (!fTbLookupSeen0)
1701 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1702}
1703
1704
1705
1706/**
1707 * Allocate a translation block for threadeded recompilation.
1708 *
1709 * This is allocated with maxed out call table and storage for opcode bytes,
1710 * because it's only supposed to be called once per EMT to allocate the TB
1711 * pointed to by IEMCPU::pThrdCompileTbR3.
1712 *
1713 * @returns Pointer to the translation block on success, NULL on failure.
1714 * @param pVM The cross context virtual machine structure.
1715 * @param pVCpu The cross context virtual CPU structure of the calling
1716 * thread.
1717 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1718 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1719 */
1720static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1721{
1722 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1723 if (pTb)
1724 {
1725 unsigned const cCalls = 256;
1726 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1727 if (pTb->Thrd.paCalls)
1728 {
1729 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1730 if (pTb->pabOpcodes)
1731 {
1732 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1733 pTb->Thrd.cAllocated = cCalls;
1734 pTb->Thrd.cCalls = 0;
1735 pTb->cbOpcodes = 0;
1736 pTb->pNext = NULL;
1737 pTb->cUsed = 0;
1738 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1739 pTb->idxAllocChunk = UINT8_MAX;
1740 pTb->GCPhysPc = GCPhysPc;
1741 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1742 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1743 pTb->cInstructions = 0;
1744 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1745
1746 /* Init the first opcode range. */
1747 pTb->cRanges = 1;
1748 pTb->aRanges[0].cbOpcodes = 0;
1749 pTb->aRanges[0].offOpcodes = 0;
1750 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1751 pTb->aRanges[0].u2Unused = 0;
1752 pTb->aRanges[0].idxPhysPage = 0;
1753 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1754 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1755
1756 return pTb;
1757 }
1758 RTMemFree(pTb->Thrd.paCalls);
1759 }
1760 RTMemFree(pTb);
1761 }
1762 RT_NOREF(pVM);
1763 return NULL;
1764}
1765
1766
1767/**
1768 * Called on the TB that are dedicated for recompilation before it's reused.
1769 *
1770 * @param pVCpu The cross context virtual CPU structure of the calling
1771 * thread.
1772 * @param pTb The translation block to reuse.
1773 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1774 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1775 */
1776static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1777{
1778 pTb->GCPhysPc = GCPhysPc;
1779 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1780 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1781 pTb->Thrd.cCalls = 0;
1782 pTb->cbOpcodes = 0;
1783 pTb->cInstructions = 0;
1784 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1785
1786 /* Init the first opcode range. */
1787 pTb->cRanges = 1;
1788 pTb->aRanges[0].cbOpcodes = 0;
1789 pTb->aRanges[0].offOpcodes = 0;
1790 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1791 pTb->aRanges[0].u2Unused = 0;
1792 pTb->aRanges[0].idxPhysPage = 0;
1793 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1794 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1795}
1796
1797
1798/**
1799 * Used to duplicate a threded translation block after recompilation is done.
1800 *
1801 * @returns Pointer to the translation block on success, NULL on failure.
1802 * @param pVM The cross context virtual machine structure.
1803 * @param pVCpu The cross context virtual CPU structure of the calling
1804 * thread.
1805 * @param pTbSrc The TB to duplicate.
1806 */
1807static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1808{
1809 /*
1810 * Just using the heap for now. Will make this more efficient and
1811 * complicated later, don't worry. :-)
1812 */
1813 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1814 if (pTb)
1815 {
1816 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1817 memcpy(pTb, pTbSrc, sizeof(*pTb));
1818 pTb->idxAllocChunk = idxAllocChunk;
1819
1820 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1821 Assert(cCalls > 0);
1822 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1823 if (pTb->Thrd.paCalls)
1824 {
1825 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1826 Assert(cbTbLookup > 0);
1827 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1828 Assert(cbOpcodes > 0);
1829 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1830 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1831 if (pbBoth)
1832 {
1833 RT_BZERO(pbBoth, cbTbLookup);
1834 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1835 pTb->Thrd.cAllocated = cCalls;
1836 pTb->pNext = NULL;
1837 pTb->cUsed = 0;
1838 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1839 pTb->fFlags = pTbSrc->fFlags;
1840
1841 return pTb;
1842 }
1843 RTMemFree(pTb->Thrd.paCalls);
1844 }
1845 iemTbAllocatorFree(pVCpu, pTb);
1846 }
1847 RT_NOREF(pVM);
1848 return NULL;
1849
1850}
1851
1852
1853/**
1854 * Adds the given TB to the hash table.
1855 *
1856 * @param pVCpu The cross context virtual CPU structure of the calling
1857 * thread.
1858 * @param pTbCache The cache to add it to.
1859 * @param pTb The translation block to add.
1860 */
1861static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1862{
1863 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1864
1865 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1866 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1867 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1868 if (LogIs12Enabled())
1869 {
1870 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1871 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1872 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1873 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1874 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1875 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1876 pTb->aRanges[idxRange].idxPhysPage == 0
1877 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1878 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1879 }
1880}
1881
1882
1883/**
1884 * Called by opcode verifier functions when they detect a problem.
1885 */
1886void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1887{
1888 /* We cannot free the current TB (indicated by fSafeToFree) because:
1889 - A threaded TB will have its current call entry accessed
1890 to update pVCpu->iem.s.cInstructions.
1891 - A native TB will have code left to execute. */
1892 if (fSafeToFree)
1893 iemTbAllocatorFree(pVCpu, pTb);
1894 else
1895 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1896}
1897
1898
1899/*
1900 * Real code.
1901 */
1902
1903#ifdef LOG_ENABLED
1904/**
1905 * Logs the current instruction.
1906 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1907 * @param pszFunction The IEM function doing the execution.
1908 * @param idxInstr The instruction number in the block.
1909 */
1910static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1911{
1912# ifdef IN_RING3
1913 if (LogIs2Enabled())
1914 {
1915 char szInstr[256];
1916 uint32_t cbInstr = 0;
1917 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1918 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1919 szInstr, sizeof(szInstr), &cbInstr);
1920
1921 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1922 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1923 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1924 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1925 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1926 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1927 " %s\n"
1928 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1929 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1930 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1931 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1932 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1933 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1934 szInstr));
1935
1936 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1937 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1938 }
1939 else
1940# endif
1941 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1942 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1943}
1944#endif /* LOG_ENABLED */
1945
1946
1947#if 0
1948static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1949{
1950 RT_NOREF(pVM, pVCpu);
1951 return rcStrict;
1952}
1953#endif
1954
1955
1956/**
1957 * Initializes the decoder state when compiling TBs.
1958 *
1959 * This presumes that fExec has already be initialized.
1960 *
1961 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1962 * to apply fixes to them as well.
1963 *
1964 * @param pVCpu The cross context virtual CPU structure of the calling
1965 * thread.
1966 * @param fReInit Clear for the first call for a TB, set for subsequent
1967 * calls from inside the compile loop where we can skip a
1968 * couple of things.
1969 * @param fExtraFlags The extra translation block flags when @a fReInit is
1970 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1971 * checked.
1972 */
1973DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1974{
1975 /* ASSUMES: That iemInitExec was already called and that anyone changing
1976 CPU state affecting the fExec bits since then will have updated fExec! */
1977 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1978 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1979
1980 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1981
1982 /* Decoder state: */
1983 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1984 pVCpu->iem.s.enmEffAddrMode = enmMode;
1985 if (enmMode != IEMMODE_64BIT)
1986 {
1987 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1988 pVCpu->iem.s.enmEffOpSize = enmMode;
1989 }
1990 else
1991 {
1992 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1993 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1994 }
1995 pVCpu->iem.s.fPrefixes = 0;
1996 pVCpu->iem.s.uRexReg = 0;
1997 pVCpu->iem.s.uRexB = 0;
1998 pVCpu->iem.s.uRexIndex = 0;
1999 pVCpu->iem.s.idxPrefix = 0;
2000 pVCpu->iem.s.uVex3rdReg = 0;
2001 pVCpu->iem.s.uVexLength = 0;
2002 pVCpu->iem.s.fEvexStuff = 0;
2003 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
2004 pVCpu->iem.s.offModRm = 0;
2005 pVCpu->iem.s.iNextMapping = 0;
2006
2007 if (!fReInit)
2008 {
2009 pVCpu->iem.s.cActiveMappings = 0;
2010 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
2011 pVCpu->iem.s.fEndTb = false;
2012 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
2013 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2014 pVCpu->iem.s.fTbCrossedPage = false;
2015 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
2016 pVCpu->iem.s.idxLastCheckIrqCallNo = UINT16_MAX;
2017 pVCpu->iem.s.fTbCurInstrIsSti = false;
2018 /* Force RF clearing and TF checking on first instruction in the block
2019 as we don't really know what came before and should assume the worst: */
2020 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
2021 }
2022 else
2023 {
2024 Assert(pVCpu->iem.s.cActiveMappings == 0);
2025 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
2026 Assert(pVCpu->iem.s.fEndTb == false);
2027 Assert(pVCpu->iem.s.fTbCrossedPage == false);
2028 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
2029 }
2030 pVCpu->iem.s.fTbCurInstr = 0;
2031
2032#ifdef DBGFTRACE_ENABLED
2033 switch (IEM_GET_CPU_MODE(pVCpu))
2034 {
2035 case IEMMODE_64BIT:
2036 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
2037 break;
2038 case IEMMODE_32BIT:
2039 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2040 break;
2041 case IEMMODE_16BIT:
2042 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2043 break;
2044 }
2045#endif
2046}
2047
2048
2049/**
2050 * Initializes the opcode fetcher when starting the compilation.
2051 *
2052 * @param pVCpu The cross context virtual CPU structure of the calling
2053 * thread.
2054 */
2055DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
2056{
2057 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
2058#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2059 pVCpu->iem.s.offOpcode = 0;
2060#else
2061 RT_NOREF(pVCpu);
2062#endif
2063}
2064
2065
2066/**
2067 * Re-initializes the opcode fetcher between instructions while compiling.
2068 *
2069 * @param pVCpu The cross context virtual CPU structure of the calling
2070 * thread.
2071 */
2072DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
2073{
2074 if (pVCpu->iem.s.pbInstrBuf)
2075 {
2076 uint64_t off = pVCpu->cpum.GstCtx.rip;
2077 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2078 off += pVCpu->cpum.GstCtx.cs.u64Base;
2079 off -= pVCpu->iem.s.uInstrBufPc;
2080 if (off < pVCpu->iem.s.cbInstrBufTotal)
2081 {
2082 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2083 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2084 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2085 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2086 else
2087 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2088 }
2089 else
2090 {
2091 pVCpu->iem.s.pbInstrBuf = NULL;
2092 pVCpu->iem.s.offInstrNextByte = 0;
2093 pVCpu->iem.s.offCurInstrStart = 0;
2094 pVCpu->iem.s.cbInstrBuf = 0;
2095 pVCpu->iem.s.cbInstrBufTotal = 0;
2096 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2097 }
2098 }
2099 else
2100 {
2101 pVCpu->iem.s.offInstrNextByte = 0;
2102 pVCpu->iem.s.offCurInstrStart = 0;
2103 pVCpu->iem.s.cbInstrBuf = 0;
2104 pVCpu->iem.s.cbInstrBufTotal = 0;
2105#ifdef VBOX_STRICT
2106 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2107#endif
2108 }
2109#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2110 pVCpu->iem.s.offOpcode = 0;
2111#endif
2112}
2113
2114#ifdef LOG_ENABLED
2115
2116/**
2117 * Inserts a NOP call.
2118 *
2119 * This is for debugging.
2120 *
2121 * @returns true on success, false if we're out of call entries.
2122 * @param pTb The translation block being compiled.
2123 */
2124bool iemThreadedCompileEmitNop(PIEMTB pTb)
2125{
2126 /* Emit the call. */
2127 uint32_t const idxCall = pTb->Thrd.cCalls;
2128 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2129 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2130 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2131 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2132 pCall->idxInstr = pTb->cInstructions - 1;
2133 pCall->cbOpcode = 0;
2134 pCall->offOpcode = 0;
2135 pCall->uTbLookup = 0;
2136 pCall->fFlags = 0;
2137 pCall->auParams[0] = 0;
2138 pCall->auParams[1] = 0;
2139 pCall->auParams[2] = 0;
2140 return true;
2141}
2142
2143
2144/**
2145 * Called by iemThreadedCompile if cpu state logging is desired.
2146 *
2147 * @returns true on success, false if we're out of call entries.
2148 * @param pTb The translation block being compiled.
2149 */
2150bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2151{
2152 /* Emit the call. */
2153 uint32_t const idxCall = pTb->Thrd.cCalls;
2154 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2155 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2156 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2157 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2158 pCall->idxInstr = pTb->cInstructions - 1;
2159 pCall->cbOpcode = 0;
2160 pCall->offOpcode = 0;
2161 pCall->uTbLookup = 0;
2162 pCall->fFlags = 0;
2163 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2164 pCall->auParams[1] = 0;
2165 pCall->auParams[2] = 0;
2166 return true;
2167}
2168
2169#endif /* LOG_ENABLED */
2170
2171DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2172{
2173 switch (cbInstr)
2174 {
2175 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2176 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2177 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2178 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2179 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2180 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2181 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2182 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2183 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2184 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2185 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2186 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2187 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2188 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2189 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2190 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2191 }
2192}
2193
2194#ifdef IEM_WITH_INTRA_TB_JUMPS
2195
2196/**
2197 * Emits the necessary tail calls for a full TB loop-jump.
2198 */
2199static bool iemThreadedCompileFullTbJump(PVMCPUCC pVCpu, PIEMTB pTb)
2200{
2201 /*
2202 * We need a timer and maybe IRQ check before jumping, so make sure
2203 * we've got sufficient call entries left before emitting anything.
2204 */
2205 uint32_t idxCall = pTb->Thrd.cCalls;
2206 if (idxCall + 1U <= pTb->Thrd.cAllocated)
2207 {
2208 /*
2209 * We're good, emit the calls.
2210 */
2211 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2212 pTb->Thrd.cCalls = (uint16_t)(idxCall + 2);
2213
2214 /* Always check timers as we risk getting stuck in a loop otherwise. We
2215 combine it with an IRQ check if that's not performed in the TB already. */
2216 pCall->enmFunction = pVCpu->iem.s.idxLastCheckIrqCallNo < idxCall
2217 ? kIemThreadedFunc_BltIn_CheckTimers
2218 : kIemThreadedFunc_BltIn_CheckTimersAndIrq;
2219 pCall->idxInstr = 0;
2220 pCall->offOpcode = 0;
2221 pCall->cbOpcode = 0;
2222 pCall->uTbLookup = 0;
2223 pCall->fFlags = 0;
2224 pCall->auParams[0] = 0;
2225 pCall->auParams[1] = 0;
2226 pCall->auParams[2] = 0;
2227 pCall++;
2228
2229 /* The jump callentry[0]. */
2230 pCall->enmFunction = kIemThreadedFunc_BltIn_Jump;
2231 pCall->idxInstr = 0;
2232 pCall->offOpcode = 0;
2233 pCall->cbOpcode = 0;
2234 pCall->uTbLookup = 0;
2235 pCall->fFlags = 0;
2236 pCall->auParams[0] = 0; /* jump target is call zero */
2237 pCall->auParams[1] = 0;
2238 pCall->auParams[2] = 0;
2239
2240 /* Mark callentry #0 as a jump target. */
2241 pTb->Thrd.paCalls[0].fFlags |= IEMTHREADEDCALLENTRY_F_JUMP_TARGET;
2242 }
2243
2244 return false;
2245}
2246
2247/**
2248 * Called by IEM_MC2_BEGIN_EMIT_CALLS when it detects that we're back at the
2249 * first instruction and we didn't just branch to it (that's handled below).
2250 *
2251 * This will emit a loop iff everything is compatible with that.
2252 */
2253DECLHIDDEN(int) iemThreadedCompileBackAtFirstInstruction(PVMCPU pVCpu, PIEMTB pTb) RT_NOEXCEPT
2254{
2255 /* Check if the mode matches. */
2256 if ( (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2257 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS))
2258 {
2259 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected2);
2260 iemThreadedCompileFullTbJump(pVCpu, pTb);
2261 }
2262 return VINF_IEM_RECOMPILE_END_TB;
2263}
2264
2265#endif /* IEM_WITH_INTRA_TB_JUMPS */
2266
2267
2268/**
2269 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2270 *
2271 * - CS LIM check required.
2272 * - Must recheck opcode bytes.
2273 * - Previous instruction branched.
2274 * - TLB load detected, probably due to page crossing.
2275 *
2276 * @returns true if everything went well, false if we're out of space in the TB
2277 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2278 * @param pVCpu The cross context virtual CPU structure of the calling
2279 * thread.
2280 * @param pTb The translation block being compiled.
2281 */
2282bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2283{
2284 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2285 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2286#if 0
2287 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2288 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2289#endif
2290
2291 /*
2292 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2293 * see if it's needed to start checking.
2294 */
2295 bool fConsiderCsLimChecking;
2296 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2297 if ( fMode == IEM_F_MODE_X86_64BIT
2298 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2299 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2300 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2301 fConsiderCsLimChecking = false; /* already enabled or not needed */
2302 else
2303 {
2304 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2305 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2306 fConsiderCsLimChecking = true; /* likely */
2307 else
2308 {
2309 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2310 return false;
2311 }
2312 }
2313
2314 /*
2315 * Prepare call now, even before we know if can accept the instruction in this TB.
2316 * This allows us amending parameters w/o making every case suffer.
2317 */
2318 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2319 uint16_t const offOpcode = pTb->cbOpcodes;
2320 uint8_t idxRange = pTb->cRanges - 1;
2321
2322 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2323 pCall->idxInstr = pTb->cInstructions;
2324 pCall->cbOpcode = cbInstr;
2325 pCall->offOpcode = offOpcode;
2326 pCall->uTbLookup = 0;
2327 pCall->fFlags = 0;
2328 pCall->auParams[0] = (uint32_t)cbInstr
2329 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2330 /* The upper dword is sometimes used for cbStartPage. */;
2331 pCall->auParams[1] = idxRange;
2332 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2333
2334/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2335 * gotten onto. If we do, stop */
2336
2337 /*
2338 * Case 1: We've branched (RIP changed).
2339 *
2340 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2341 * TB, end the TB here as it is most likely a loop and if it
2342 * made sense to unroll it, the guest code compiler should've
2343 * done it already.
2344 *
2345 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2346 * Req: 1 extra range, no extra phys.
2347 *
2348 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2349 * necessary (fTbCrossedPage is true).
2350 * Req: 1 extra range, probably 1 extra phys page entry.
2351 *
2352 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2353 * but in addition we cross into the following page and require
2354 * another TLB load.
2355 * Req: 2 extra ranges, probably 2 extra phys page entries.
2356 *
2357 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2358 * the following page (thus fTbCrossedPage is true).
2359 * Req: 2 extra ranges, probably 1 extra phys page entry.
2360 *
2361 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2362 * it may trigger "spuriously" from the CPU point of view because of
2363 * physical page changes that'll invalid the physical TLB and trigger a
2364 * call to the function. In theory this be a big deal, just a bit
2365 * performance loss as we'll pick the LoadingTlb variants.
2366 *
2367 * Note! We do not currently optimize branching to the next instruction (sorry
2368 * 32-bit PIC code). We could maybe do that in the branching code that
2369 * sets (or not) fTbBranched.
2370 */
2371 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2372 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2373 * code. This'll require filtering out far jmps and calls, as they
2374 * load CS which should technically be considered indirect since the
2375 * GDT/LDT entry's base address can be modified independently from
2376 * the code. */
2377 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2378 {
2379 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2380 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2381 {
2382 /* 1a + 1b - instruction fully within the branched to page. */
2383 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2384 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2385
2386 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2387 {
2388 /* Check that we've got a free range. */
2389 idxRange += 1;
2390 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2391 { /* likely */ }
2392 else
2393 {
2394 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2395 return false;
2396 }
2397 pCall->auParams[1] = idxRange;
2398 pCall->auParams[2] = 0;
2399
2400 /* Check that we've got a free page slot. */
2401 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2402 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2403 uint8_t idxPhysPage;
2404 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2405 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2406 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2407 {
2408 pTb->aGCPhysPages[0] = GCPhysNew;
2409 pTb->aRanges[idxRange].idxPhysPage = 1;
2410 idxPhysPage = UINT8_MAX;
2411 }
2412 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2413 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2414 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2415 {
2416 pTb->aGCPhysPages[1] = GCPhysNew;
2417 pTb->aRanges[idxRange].idxPhysPage = 2;
2418 idxPhysPage = UINT8_MAX;
2419 }
2420 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2421 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2422 else
2423 {
2424 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2425 return false;
2426 }
2427
2428 /* Loop check: We weave the loop check in here to optimize the lookup. */
2429 if (idxPhysPage != UINT8_MAX)
2430 {
2431 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2432 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2433 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2434 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2435 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2436 {
2437 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2438#ifdef IEM_WITH_INTRA_TB_JUMPS
2439 /* If we're looping back to the start of the TB and the mode is still the same,
2440 we could emit a jump optimization. For now we don't do page transitions
2441 as that implies TLB loading and such. */
2442 if ( idxLoopRange == 0
2443 && offPhysPc == pTb->aRanges[0].offPhysPage
2444 && (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2445 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS)
2446 && (pVCpu->iem.s.fTbBranched & ( IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR
2447 | IEMBRANCHED_F_STACK | IEMBRANCHED_F_RELATIVE))
2448 == IEMBRANCHED_F_RELATIVE)
2449 {
2450 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected);
2451 return iemThreadedCompileFullTbJump(pVCpu, pTb);
2452 }
2453#endif
2454 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2455 return false;
2456 }
2457 }
2458
2459 /* Finish setting up the new range. */
2460 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2461 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2462 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2463 pTb->aRanges[idxRange].u2Unused = 0;
2464 pTb->cRanges++;
2465 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2466 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2467 pTb->aRanges[idxRange].offOpcodes));
2468 }
2469 else
2470 {
2471 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2472 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2473 }
2474
2475 /* Determin which function we need to load & check.
2476 Note! For jumps to a new page, we'll set both fTbBranched and
2477 fTbCrossedPage to avoid unnecessary TLB work for intra
2478 page branching */
2479 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2480 || pVCpu->iem.s.fTbCrossedPage)
2481 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2482 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2483 : !fConsiderCsLimChecking
2484 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2485 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2486 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2487 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2488 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2489 : !fConsiderCsLimChecking
2490 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2491 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2492 else
2493 {
2494 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2495 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2496 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2497 : !fConsiderCsLimChecking
2498 ? kIemThreadedFunc_BltIn_CheckOpcodes
2499 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2500 }
2501 }
2502 else
2503 {
2504 /* 1c + 1d - instruction crosses pages. */
2505 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2506 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2507
2508 /* Lazy bird: Check that this isn't case 1c, since we've already
2509 load the first physical address. End the TB and
2510 make it a case 2b instead.
2511
2512 Hmm. Too much bother to detect, so just do the same
2513 with case 1d as well. */
2514#if 0 /** @todo get back to this later when we've got the actual branch code in
2515 * place. */
2516 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2517
2518 /* Check that we've got two free ranges. */
2519 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2520 { /* likely */ }
2521 else
2522 return false;
2523 idxRange += 1;
2524 pCall->auParams[1] = idxRange;
2525 pCall->auParams[2] = 0;
2526
2527 /* ... */
2528
2529#else
2530 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2531 return false;
2532#endif
2533 }
2534 }
2535
2536 /*
2537 * Case 2: Page crossing.
2538 *
2539 * Sub-case 2a: The instruction starts on the first byte in the next page.
2540 *
2541 * Sub-case 2b: The instruction has opcode bytes in both the current and
2542 * following page.
2543 *
2544 * Both cases requires a new range table entry and probably a new physical
2545 * page entry. The difference is in which functions to emit and whether to
2546 * add bytes to the current range.
2547 */
2548 else if (pVCpu->iem.s.fTbCrossedPage)
2549 {
2550 /* Check that we've got a free range. */
2551 idxRange += 1;
2552 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2553 { /* likely */ }
2554 else
2555 {
2556 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2557 return false;
2558 }
2559
2560 /* Check that we've got a free page slot. */
2561 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2562 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2563 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2564 pTb->aRanges[idxRange].idxPhysPage = 0;
2565 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2566 || pTb->aGCPhysPages[0] == GCPhysNew)
2567 {
2568 pTb->aGCPhysPages[0] = GCPhysNew;
2569 pTb->aRanges[idxRange].idxPhysPage = 1;
2570 }
2571 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2572 || pTb->aGCPhysPages[1] == GCPhysNew)
2573 {
2574 pTb->aGCPhysPages[1] = GCPhysNew;
2575 pTb->aRanges[idxRange].idxPhysPage = 2;
2576 }
2577 else
2578 {
2579 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2580 return false;
2581 }
2582
2583 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2584 {
2585 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2586 pCall->auParams[1] = idxRange;
2587 pCall->auParams[2] = 0;
2588
2589 /* Finish setting up the new range. */
2590 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2591 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2592 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2593 pTb->aRanges[idxRange].u2Unused = 0;
2594 pTb->cRanges++;
2595 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2596 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2597 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2598
2599 /* Determin which function we need to load & check. */
2600 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2601 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2602 : !fConsiderCsLimChecking
2603 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2604 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2605 }
2606 else
2607 {
2608 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2609 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2610 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2611 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2612
2613 /* We've good. Split the instruction over the old and new range table entries. */
2614 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2615
2616 pTb->aRanges[idxRange].offPhysPage = 0;
2617 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2618 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2619 pTb->aRanges[idxRange].u2Unused = 0;
2620 pTb->cRanges++;
2621 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2622 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2623 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2624
2625 /* Determin which function we need to load & check. */
2626 if (pVCpu->iem.s.fTbCheckOpcodes)
2627 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2628 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2629 : !fConsiderCsLimChecking
2630 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2631 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2632 else
2633 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2634 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2635 : !fConsiderCsLimChecking
2636 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2637 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2638 }
2639 }
2640
2641 /*
2642 * Regular case: No new range required.
2643 */
2644 else
2645 {
2646 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2647 if (pVCpu->iem.s.fTbCheckOpcodes)
2648 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2649 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2650 : kIemThreadedFunc_BltIn_CheckOpcodes;
2651 else
2652 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2653
2654 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2655 pTb->cbOpcodes = offOpcode + cbInstr;
2656 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2657 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2658 }
2659
2660 /*
2661 * Commit the call.
2662 */
2663 pTb->Thrd.cCalls++;
2664
2665 /*
2666 * Clear state.
2667 */
2668 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2669 pVCpu->iem.s.fTbCrossedPage = false;
2670 pVCpu->iem.s.fTbCheckOpcodes = false;
2671
2672 /*
2673 * Copy opcode bytes.
2674 */
2675 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2676 pTb->cbOpcodes = offOpcode + cbInstr;
2677 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2678
2679 return true;
2680}
2681
2682
2683/**
2684 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2685 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2686 *
2687 * @returns true if anything is pending, false if not.
2688 * @param pVCpu The cross context virtual CPU structure of the calling
2689 * thread.
2690 */
2691DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2692{
2693 uint64_t fCpu = pVCpu->fLocalForcedActions;
2694 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2695#if 1
2696 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2697 if (RT_LIKELY( !fCpu
2698 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2699 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2700 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2701 return false;
2702 return true;
2703#else
2704 return false;
2705#endif
2706
2707}
2708
2709
2710/**
2711 * Called by iemThreadedCompile when a block requires a mode check.
2712 *
2713 * @returns true if we should continue, false if we're out of call entries.
2714 * @param pVCpu The cross context virtual CPU structure of the calling
2715 * thread.
2716 * @param pTb The translation block being compiled.
2717 */
2718static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2719{
2720 /* Emit the call. */
2721 uint32_t const idxCall = pTb->Thrd.cCalls;
2722 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2723 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2724 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2725 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2726 pCall->idxInstr = pTb->cInstructions - 1;
2727 pCall->cbOpcode = 0;
2728 pCall->offOpcode = 0;
2729 pCall->uTbLookup = 0;
2730 pCall->fFlags = 0;
2731 pCall->auParams[0] = pVCpu->iem.s.fExec;
2732 pCall->auParams[1] = 0;
2733 pCall->auParams[2] = 0;
2734 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2735 return true;
2736}
2737
2738
2739/**
2740 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2741 * set.
2742 *
2743 * @returns true if we should continue, false if an IRQ is deliverable or a
2744 * relevant force flag is pending.
2745 * @param pVCpu The cross context virtual CPU structure of the calling
2746 * thread.
2747 * @param pTb The translation block being compiled.
2748 * @sa iemThreadedCompileCheckIrq
2749 */
2750bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2751{
2752 /*
2753 * Skip this we've already emitted a call after the previous instruction
2754 * or if it's the first call, as we're always checking FFs between blocks.
2755 */
2756 uint32_t const idxCall = pTb->Thrd.cCalls;
2757 if ( idxCall > 0
2758 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2759 {
2760 /* Emit the call. */
2761 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2762 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2763 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2764 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2765 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2766 pCall->idxInstr = pTb->cInstructions;
2767 pCall->offOpcode = 0;
2768 pCall->cbOpcode = 0;
2769 pCall->uTbLookup = 0;
2770 pCall->fFlags = 0;
2771 pCall->auParams[0] = 0;
2772 pCall->auParams[1] = 0;
2773 pCall->auParams[2] = 0;
2774 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2775
2776 /* Reset the IRQ check value. */
2777 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2778
2779 /*
2780 * Check for deliverable IRQs and pending force flags.
2781 */
2782 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2783 }
2784 return true; /* continue */
2785}
2786
2787
2788/**
2789 * Emits an IRQ check call and checks for pending IRQs.
2790 *
2791 * @returns true if we should continue, false if an IRQ is deliverable or a
2792 * relevant force flag is pending.
2793 * @param pVCpu The cross context virtual CPU structure of the calling
2794 * thread.
2795 * @param pTb The transation block.
2796 * @sa iemThreadedCompileBeginEmitCallsComplications
2797 */
2798static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2799{
2800 /* Check again in a little bit, unless it is immediately following an STI
2801 in which case we *must* check immediately after the next instruction
2802 as well in case it's executed with interrupt inhibition. We could
2803 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2804 bs3-timers-1 which is doing sti + sti + cli. */
2805 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2806 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2807 else
2808 {
2809 pVCpu->iem.s.fTbCurInstrIsSti = false;
2810 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2811 }
2812 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2813
2814 /*
2815 * Emit the call.
2816 */
2817 uint32_t const idxCall = pTb->Thrd.cCalls;
2818 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2819 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2820 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2821 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2822 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2823 pCall->idxInstr = pTb->cInstructions;
2824 pCall->offOpcode = 0;
2825 pCall->cbOpcode = 0;
2826 pCall->uTbLookup = 0;
2827 pCall->fFlags = 0;
2828 pCall->auParams[0] = 0;
2829 pCall->auParams[1] = 0;
2830 pCall->auParams[2] = 0;
2831
2832 /*
2833 * Check for deliverable IRQs and pending force flags.
2834 */
2835 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2836}
2837
2838
2839/**
2840 * Compiles a new TB and executes it.
2841 *
2842 * We combine compilation and execution here as it makes it simpler code flow
2843 * in the main loop and it allows interpreting while compiling if we want to
2844 * explore that option.
2845 *
2846 * @returns Strict VBox status code.
2847 * @param pVM The cross context virtual machine structure.
2848 * @param pVCpu The cross context virtual CPU structure of the calling
2849 * thread.
2850 * @param GCPhysPc The physical address corresponding to the current
2851 * RIP+CS.BASE.
2852 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2853 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2854 */
2855static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2856{
2857 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2858 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2859 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2860
2861 /*
2862 * Get the TB we use for the recompiling. This is a maxed-out TB so
2863 * that'll we'll make a more efficient copy of when we're done compiling.
2864 */
2865 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2866 if (pTb)
2867 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2868 else
2869 {
2870 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2871 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2872 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2873 }
2874 pTb->FlatPc = pVCpu->iem.s.uInstrBufPc | (GCPhysPc & GUEST_PAGE_OFFSET_MASK);
2875
2876 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2877 functions may get at it. */
2878 pVCpu->iem.s.pCurTbR3 = pTb;
2879
2880#if 0
2881 /* Make sure the CheckIrq condition matches the one in EM. */
2882 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2883 const uint32_t cZeroCalls = 1;
2884#else
2885 const uint32_t cZeroCalls = 0;
2886#endif
2887
2888 /*
2889 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2890 */
2891 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2892 iemThreadedCompileInitOpcodeFetching(pVCpu);
2893 VBOXSTRICTRC rcStrict;
2894 for (;;)
2895 {
2896 /* Process the next instruction. */
2897#ifdef LOG_ENABLED
2898 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2899 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2900 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2901 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2902#endif
2903 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2904 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2905
2906 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2907#if 0
2908 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2909 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2910 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2911#endif
2912 if ( rcStrict == VINF_SUCCESS
2913 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2914 && !pVCpu->iem.s.fEndTb)
2915 {
2916 Assert(pTb->Thrd.cCalls > cCallsPrev);
2917 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2918
2919 pVCpu->iem.s.cInstructions++;
2920
2921 /* Check for mode change _after_ certain CIMPL calls, so check that
2922 we continue executing with the same mode value. */
2923 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2924 { /* probable */ }
2925 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2926 { /* extremely likely */ }
2927 else
2928 break;
2929
2930#if defined(LOG_ENABLED) && 0 /* for debugging */
2931 //iemThreadedCompileEmitNop(pTb);
2932 iemThreadedCompileEmitLogCpuState(pTb);
2933#endif
2934 }
2935 else
2936 {
2937 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2938 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2939 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2940 rcStrict = VINF_SUCCESS;
2941
2942 if (pTb->Thrd.cCalls > cZeroCalls)
2943 {
2944 if (cCallsPrev != pTb->Thrd.cCalls)
2945 pVCpu->iem.s.cInstructions++;
2946 break;
2947 }
2948
2949 pVCpu->iem.s.pCurTbR3 = NULL;
2950 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2951 }
2952
2953 /* Check for IRQs? */
2954 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2955 pVCpu->iem.s.cInstrTillIrqCheck--;
2956 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2957 break;
2958
2959 /* Still space in the TB? */
2960 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2961 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2962 && pTb->cTbLookupEntries < 127)
2963 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2964 else
2965 {
2966 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2967 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2968 break;
2969 }
2970 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2971 }
2972
2973 /*
2974 * Reserve lookup space for the final call entry if necessary.
2975 */
2976 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2977 if (pTb->Thrd.cCalls > 1)
2978 {
2979 if (pFinalCall->uTbLookup == 0)
2980 {
2981 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2982 pTb->cTbLookupEntries += 1;
2983 }
2984 }
2985 else if (pFinalCall->uTbLookup != 0)
2986 {
2987 Assert(pTb->cTbLookupEntries > 1);
2988 pFinalCall->uTbLookup -= 1;
2989 pTb->cTbLookupEntries -= 1;
2990 }
2991
2992 /*
2993 * Duplicate the TB into a completed one and link it.
2994 */
2995 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2996 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2997
2998 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2999
3000#ifdef IEM_COMPILE_ONLY_MODE
3001 /*
3002 * Execute the translation block.
3003 */
3004#endif
3005
3006 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3007}
3008
3009
3010
3011/*********************************************************************************************************************************
3012* Threaded Translation Block Saving and Restoring for Profiling the Native Recompiler *
3013*********************************************************************************************************************************/
3014#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3015# include <iprt/message.h>
3016
3017static const SSMFIELD g_aIemThreadedTbFields[] =
3018{
3019 SSMFIELD_ENTRY( IEMTB, cUsed),
3020 SSMFIELD_ENTRY( IEMTB, msLastUsed),
3021 SSMFIELD_ENTRY_GCPHYS(IEMTB, GCPhysPc),
3022 SSMFIELD_ENTRY( IEMTB, fFlags),
3023 SSMFIELD_ENTRY( IEMTB, x86.fAttr),
3024 SSMFIELD_ENTRY( IEMTB, cRanges),
3025 SSMFIELD_ENTRY( IEMTB, cInstructions),
3026 SSMFIELD_ENTRY( IEMTB, Thrd.cCalls),
3027 SSMFIELD_ENTRY( IEMTB, cTbLookupEntries),
3028 SSMFIELD_ENTRY( IEMTB, cbOpcodes),
3029 SSMFIELD_ENTRY( IEMTB, FlatPc),
3030 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[0]),
3031 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[1]),
3032 SSMFIELD_ENTRY_TERM()
3033};
3034
3035/**
3036 * Saves a threaded TB to a dedicated saved state file.
3037 */
3038static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb)
3039{
3040 /* Only VCPU #0 for now. */
3041 if (pVCpu->idCpu != 0)
3042 return;
3043
3044 /*
3045 * Get the SSM handle, lazily opening the output file.
3046 */
3047 PSSMHANDLE const pNil = (PSSMHANDLE)~(uintptr_t)0; Assert(!RT_VALID_PTR(pNil));
3048 PSSMHANDLE pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3049 if (pSSM && pSSM != pNil)
3050 { /* likely */ }
3051 else if (pSSM)
3052 return;
3053 else
3054 {
3055 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil;
3056 int rc = SSMR3Open("ThreadedTBsForRecompilerProfiling.sav", NULL, NULL, SSM_OPEN_F_FOR_WRITING, &pSSM);
3057 AssertLogRelRCReturnVoid(rc);
3058
3059 rc = SSMR3WriteFileHeader(pSSM, 1);
3060 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3061
3062 rc = SSMR3WriteUnitBegin(pSSM, "threaded-tbs", 1, 0);
3063 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3064 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pSSM;
3065 }
3066
3067 /*
3068 * Do the actual saving.
3069 */
3070 SSMR3PutU32(pSSM, 0); /* Indicates that another TB follows. */
3071
3072 /* The basic structure. */
3073 SSMR3PutStructEx(pSSM, pTb, sizeof(*pTb), 0 /*fFlags*/, g_aIemThreadedTbFields, NULL);
3074
3075 /* The ranges. */
3076 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3077 {
3078 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offOpcodes);
3079 SSMR3PutU16(pSSM, pTb->aRanges[iRange].cbOpcodes);
3080 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offPhysPage | (pTb->aRanges[iRange].idxPhysPage << 14));
3081 }
3082
3083 /* The opcodes. */
3084 SSMR3PutMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3085
3086 /* The threaded call table. */
3087 int rc = SSMR3PutMem(pSSM, pTb->Thrd.paCalls, sizeof(*pTb->Thrd.paCalls) * pTb->Thrd.cCalls);
3088 AssertLogRelMsgStmt(RT_SUCCESS(rc), ("rc=%Rrc\n", rc), pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil);
3089}
3090
3091
3092/**
3093 * Called by IEMR3Term to finish any open profile files.
3094 *
3095 * @note This is not called on the EMT for @a pVCpu, but rather on the thread
3096 * driving the VM termination.
3097 */
3098DECLHIDDEN(void) iemThreadedSaveTbForProfilingCleanup(PVMCPU pVCpu)
3099{
3100 PSSMHANDLE const pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3101 pVCpu->iem.s.pSsmThreadedTbsForProfiling = NULL;
3102 if (RT_VALID_PTR(pSSM))
3103 {
3104 /* Indicate that this is the end. */
3105 SSMR3PutU32(pSSM, UINT32_MAX);
3106
3107 int rc = SSMR3WriteUnitComplete(pSSM);
3108 AssertLogRelRC(rc);
3109 rc = SSMR3WriteFileFooter(pSSM);
3110 AssertLogRelRC(rc);
3111 rc = SSMR3Close(pSSM);
3112 AssertLogRelRC(rc);
3113 }
3114}
3115
3116#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER && VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING */
3117
3118#ifdef IN_RING3
3119/**
3120 * API use to process what iemThreadedSaveTbForProfiling() saved.
3121 *
3122 * @note Do not mix build types or revisions. Local changes between saving the
3123 * TBs and calling this API may cause unexpected trouble.
3124 */
3125VMMR3DECL(int) IEMR3ThreadedProfileRecompilingSavedTbs(PVM pVM, const char *pszFilename, uint32_t cMinTbs)
3126{
3127# if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3128 PVMCPU const pVCpu = pVM->apCpusR3[0];
3129
3130 /* We need to keep an eye on the TB allocator. */
3131 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
3132
3133 /*
3134 * Load the TBs from the file.
3135 */
3136 PSSMHANDLE pSSM = NULL;
3137 int rc = SSMR3Open(pszFilename, NULL, NULL, 0, &pSSM);
3138 if (RT_SUCCESS(rc))
3139 {
3140 uint32_t cTbs = 0;
3141 PIEMTB pTbHead = NULL;
3142 PIEMTB *ppTbTail = &pTbHead;
3143 uint32_t uVersion;
3144 rc = SSMR3Seek(pSSM, "threaded-tbs", 0, &uVersion);
3145 if (RT_SUCCESS(rc))
3146 {
3147 for (;; cTbs++)
3148 {
3149 /* Check for the end tag. */
3150 uint32_t uTag = 0;
3151 rc = SSMR3GetU32(pSSM, &uTag);
3152 AssertRCBreak(rc);
3153 if (uTag == UINT32_MAX)
3154 break;
3155 AssertBreakStmt(uTag == 0, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3156
3157 /* Do we have room for another TB? */
3158 if (pTbAllocator->cInUseTbs + 2 >= pTbAllocator->cMaxTbs)
3159 {
3160 RTMsgInfo("Too many TBs to load, stopping loading early.\n");
3161 break;
3162 }
3163
3164 /* Allocate a new TB. */
3165 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
3166 AssertBreakStmt(uTag == 0, rc = VERR_OUT_OF_RESOURCES);
3167
3168 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
3169 RT_ZERO(*pTb);
3170 pTb->idxAllocChunk = idxAllocChunk;
3171
3172 rc = SSMR3GetStructEx(pSSM, pTb, sizeof(*pTb), 0, g_aIemThreadedTbFields, NULL);
3173 if (RT_SUCCESS(rc))
3174 {
3175 AssertStmt(pTb->Thrd.cCalls > 0 && pTb->Thrd.cCalls <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3176 AssertStmt(pTb->cbOpcodes > 0 && pTb->cbOpcodes <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3177 AssertStmt(pTb->cRanges > 0 && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges), rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3178 AssertStmt(pTb->cTbLookupEntries > 0 && pTb->cTbLookupEntries <= 136, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3179
3180 if (RT_SUCCESS(rc))
3181 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3182 {
3183 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].offOpcodes);
3184 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].cbOpcodes);
3185 uint16_t uTmp = 0;
3186 rc = SSMR3GetU16(pSSM, &uTmp);
3187 AssertRCBreak(rc);
3188 pTb->aRanges[iRange].offPhysPage = uTmp & GUEST_PAGE_OFFSET_MASK;
3189 pTb->aRanges[iRange].idxPhysPage = uTmp >> 14;
3190
3191 AssertBreakStmt(pTb->aRanges[iRange].idxPhysPage <= RT_ELEMENTS(pTb->aGCPhysPages),
3192 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3193 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes < pTb->cbOpcodes,
3194 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3195 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes + pTb->aRanges[iRange].cbOpcodes <= pTb->cbOpcodes,
3196 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3197 }
3198
3199 if (RT_SUCCESS(rc))
3200 {
3201 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAllocZ(sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3202 if (pTb->Thrd.paCalls)
3203 {
3204 size_t const cbTbLookup = pTb->cTbLookupEntries * sizeof(PIEMTB);
3205 Assert(cbTbLookup > 0);
3206 size_t const cbOpcodes = pTb->cbOpcodes;
3207 Assert(cbOpcodes > 0);
3208 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
3209 uint8_t * const pbBoth = (uint8_t *)RTMemAllocZ(cbBoth);
3210 if (pbBoth)
3211 {
3212 pTb->pabOpcodes = &pbBoth[cbTbLookup];
3213 SSMR3GetMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3214 rc = SSMR3GetMem(pSSM, pTb->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3215 if (RT_SUCCESS(rc))
3216 {
3217 *ppTbTail = pTb;
3218 ppTbTail = &pTb->pNext;
3219 continue;
3220 }
3221 }
3222 else
3223 rc = VERR_NO_MEMORY;
3224 RTMemFree(pTb->Thrd.paCalls);
3225 }
3226 else
3227 rc = VERR_NO_MEMORY;
3228 }
3229 }
3230 iemTbAllocatorFree(pVCpu, pTb);
3231 break;
3232 }
3233 if (RT_FAILURE(rc))
3234 RTMsgError("Load error: %Rrc (cTbs=%u)", rc, cTbs);
3235 }
3236 else
3237 RTMsgError("SSMR3Seek failed on '%s': %Rrc", pszFilename, rc);
3238 SSMR3Close(pSSM);
3239 if (RT_SUCCESS(rc))
3240 {
3241 /*
3242 * Recompile the TBs.
3243 */
3244 if (pTbHead)
3245 {
3246 RTMsgInfo("Loaded %u TBs\n", cTbs);
3247 if (cTbs < cMinTbs)
3248 {
3249 RTMsgInfo("Duplicating TBs to reach %u TB target\n", cMinTbs);
3250 for (PIEMTB pTb = pTbHead;
3251 cTbs < cMinTbs && pTbAllocator->cInUseTbs + 2 <= pTbAllocator->cMaxTbs;
3252 pTb = pTb->pNext)
3253 {
3254 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
3255 if (!pTbCopy)
3256 break;
3257 *ppTbTail = pTbCopy;
3258 ppTbTail = &pTbCopy->pNext;
3259 cTbs++;
3260 }
3261 }
3262
3263 PIEMTB pTbWarmup = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead);
3264 if (pTbWarmup)
3265 {
3266 iemNativeRecompile(pVCpu, pTbWarmup);
3267 RTThreadSleep(512); /* to make the start visible in the profiler. */
3268 RTMsgInfo("Ready, set, go!\n");
3269
3270 if ((pTbWarmup->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3271 {
3272 uint32_t cFailed = 0;
3273 uint64_t const nsStart = RTTimeNanoTS();
3274 for (PIEMTB pTb = pTbHead; pTb; pTb = pTb->pNext)
3275 {
3276 iemNativeRecompile(pVCpu, pTb);
3277 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) != IEMTB_F_TYPE_NATIVE)
3278 cFailed++;
3279 }
3280 uint64_t const cNsElapsed = RTTimeNanoTS() - nsStart;
3281 RTMsgInfo("Recompiled %u TBs in %'RU64 ns - averaging %'RU64 ns/TB\n",
3282 cTbs, cNsElapsed, (cNsElapsed + cTbs - 1) / cTbs);
3283 if (cFailed)
3284 {
3285 RTMsgError("Unforuntately %u TB failed!", cFailed);
3286 rc = VERR_GENERAL_FAILURE;
3287 }
3288 RTThreadSleep(128); /* Another gap in the profiler timeline. */
3289 }
3290 else
3291 {
3292 RTMsgError("Failed to recompile the first TB!");
3293 rc = VERR_GENERAL_FAILURE;
3294 }
3295 }
3296 else
3297 rc = VERR_NO_MEMORY;
3298 }
3299 else
3300 {
3301 RTMsgError("'%s' contains no TBs!", pszFilename);
3302 rc = VERR_NO_DATA;
3303 }
3304 }
3305 }
3306 else
3307 RTMsgError("SSMR3Open failed on '%s': %Rrc", pszFilename, rc);
3308 return rc;
3309
3310# else
3311 RT_NOREF(pVM, pszFilename, cMinTbs);
3312 return VERR_NOT_IMPLEMENTED;
3313# endif
3314}
3315#endif /* IN_RING3 */
3316
3317
3318/*********************************************************************************************************************************
3319* Recompiled Execution Core *
3320*********************************************************************************************************************************/
3321
3322/** Default TB factor.
3323 * This is basically the number of nanoseconds we guess executing a TB takes
3324 * on average. We estimates it high if we can.
3325 * @note Best if this is a power of two so it can be translated to a shift. */
3326#define IEM_TIMER_POLL_DEFAULT_FACTOR UINT32_C(64)
3327/** The minimum number of nanoseconds we can allow between timer pollings.
3328 * This must take the cost of TMTimerPollBoolWithNanoTS into mind. We put that
3329 * cost at 104 ns now, thus this constant is at 256 ns. */
3330#define IEM_TIMER_POLL_MIN_NS UINT32_C(256)
3331/** The IEM_TIMER_POLL_MIN_NS value roughly translated to TBs, with some grains
3332 * of salt thrown in.
3333 * The idea is that we will be able to make progress with guest code execution
3334 * before polling timers and between running timers. */
3335#define IEM_TIMER_POLL_MIN_ITER UINT32_C(12)
3336/** The maximum number of nanoseconds we can allow between timer pollings.
3337 * This probably shouldn't be too high, as we don't have any timer
3338 * reprogramming feedback in the polling code. So, when a device reschedule a
3339 * timer for an earlier delivery, we won't know about it. */
3340#define IEM_TIMER_POLL_MAX_NS UINT32_C(8388608) /* 0x800000 ns = 8.4 ms */
3341/** The IEM_TIMER_POLL_MAX_NS value roughly translated to TBs, with some grains
3342 * of salt thrown in.
3343 * This helps control fluctuations in the NU benchmark. */
3344#define IEM_TIMER_POLL_MAX_ITER _512K
3345
3346#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3347/**
3348 * Calculates the number of TBs till the next timer polling using defaults.
3349 *
3350 * This is used when the previous run wasn't long enough to provide sufficient
3351 * data and when comming back from the HALT state and we haven't actually
3352 * executed anything for a while.
3353 */
3354DECL_FORCE_INLINE(uint32_t) iemPollTimersCalcDefaultCountdown(uint64_t cNsDelta) RT_NOEXCEPT
3355{
3356 if (cNsDelta >= IEM_TIMER_POLL_MAX_NS)
3357 return RT_MIN(IEM_TIMER_POLL_MAX_NS / IEM_TIMER_POLL_DEFAULT_FACTOR, IEM_TIMER_POLL_MAX_ITER);
3358
3359 cNsDelta = RT_BIT_64(ASMBitFirstSetU32(cNsDelta) - 1); /* round down to power of 2 */
3360 uint32_t const cRet = cNsDelta / IEM_TIMER_POLL_DEFAULT_FACTOR;
3361 if (cRet >= IEM_TIMER_POLL_MIN_ITER)
3362 {
3363 if (cRet <= IEM_TIMER_POLL_MAX_ITER)
3364 return cRet;
3365 return IEM_TIMER_POLL_MAX_ITER;
3366 }
3367 return IEM_TIMER_POLL_MIN_ITER;
3368}
3369#endif
3370
3371
3372/**
3373 * Helper for polling timers.
3374 */
3375DECLHIDDEN(int) iemPollTimers(PVMCC pVM, PVMCPUCC pVCpu) RT_NOEXCEPT
3376{
3377 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPoll, a);
3378
3379 /*
3380 * Check for VM_FF_TM_VIRTUAL_SYNC and call TMR3VirtualSyncFF if set.
3381 * This is something all EMTs can do.
3382 */
3383 /* If the virtual sync FF is set, respond to it. */
3384 bool fRanTimers = VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC);
3385 if (!fRanTimers)
3386 { /* likely */ }
3387 else
3388 {
3389 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3390 TMR3VirtualSyncFF(pVM, pVCpu);
3391 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3392 }
3393
3394 /*
3395 * Poll timers.
3396 *
3397 * On the 10980xe the polling averaging 314 ticks, with a min of 201, while
3398 * running a norton utilities DOS benchmark program. TSC runs at 3GHz,
3399 * translating that to 104 ns and 67 ns respectively. (An M2 booting win11
3400 * has an average of 2 ticks / 84 ns.)
3401 *
3402 * With the same setup the TMR3VirtualSyncFF and else branch here profiles
3403 * to 79751 ticks / 26583 ns on average, with a min of 1194 ticks / 398 ns.
3404 * (An M2 booting win11 has an average of 24 ticks / 1008 ns, with a min of
3405 * 8 ticks / 336 ns.)
3406 *
3407 * If we get a zero return value we run timers. Non-timer EMTs shouldn't
3408 * ever see a zero value here, so we just call TMR3TimerQueuesDo. However,
3409 * we do not re-run timers if we already called TMR3VirtualSyncFF above, we
3410 * try to make sure some code is executed first.
3411 */
3412 uint64_t nsNow = 0;
3413 uint64_t cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3414 if (cNsDelta >= 1) /* It is okay to run virtual sync timers a little early. */
3415 { /* likely */ }
3416 else if (!fRanTimers || VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
3417 {
3418 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3419 TMR3TimerQueuesDo(pVM);
3420 fRanTimers = true;
3421 nsNow = 0;
3422 cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3423 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3424 }
3425 else
3426 cNsDelta = 33;
3427
3428 /*
3429 * Calc interval and update the timestamps.
3430 */
3431 uint64_t const cNsSinceLast = nsNow - pVCpu->iem.s.nsRecompilerPollNow;
3432 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3433 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3434
3435 /*
3436 * Set the next polling count down value.
3437 *
3438 * We take the previous value and adjust it according to the cNsSinceLast
3439 * value, if it's not within reason. This can't be too accurate since the
3440 * CheckIrq and intra-TB-checks aren't evenly spaced, they depends highly
3441 * on the guest code.
3442 */
3443#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3444 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3445 if (cNsDelta >= RT_NS_1SEC / 4)
3446 {
3447 /*
3448 * Non-timer EMTs should end up here with a fixed 500ms delta, just return
3449 * the max and keep the polling over head to the deadicated timer EMT.
3450 */
3451 AssertCompile(IEM_TIMER_POLL_MAX_ITER * IEM_TIMER_POLL_DEFAULT_FACTOR <= RT_NS_100MS);
3452 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3453 }
3454 else
3455 {
3456 /*
3457 * This is the timer EMT.
3458 */
3459 if (cNsDelta <= IEM_TIMER_POLL_MIN_NS)
3460 {
3461 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollTiny);
3462 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3463 }
3464 else
3465 {
3466 uint32_t const cNsDeltaAdj = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS : (uint32_t)cNsDelta;
3467 uint32_t const cNsDeltaSlack = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS / 2 : cNsDeltaAdj / 4;
3468 if ( cNsSinceLast < RT_MAX(IEM_TIMER_POLL_MIN_NS, 64)
3469 || cItersTillNextPoll < IEM_TIMER_POLL_MIN_ITER /* paranoia */)
3470 {
3471 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollDefaultCalc);
3472 cItersTillNextPoll = iemPollTimersCalcDefaultCountdown(cNsDeltaAdj);
3473 }
3474 else if ( cNsSinceLast >= cNsDeltaAdj + cNsDeltaSlack
3475 || cNsSinceLast <= cNsDeltaAdj - cNsDeltaSlack)
3476 {
3477 if (cNsSinceLast >= cItersTillNextPoll)
3478 {
3479 uint32_t uFactor = (uint32_t)(cNsSinceLast + cItersTillNextPoll - 1) / cItersTillNextPoll;
3480 cItersTillNextPoll = cNsDeltaAdj / uFactor;
3481 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorDivision, uFactor);
3482 }
3483 else
3484 {
3485 uint32_t uFactor = cItersTillNextPoll / (uint32_t)cNsSinceLast;
3486 cItersTillNextPoll = cNsDeltaAdj * uFactor;
3487 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorMultiplication, uFactor);
3488 }
3489
3490 if (cItersTillNextPoll >= IEM_TIMER_POLL_MIN_ITER)
3491 {
3492 if (cItersTillNextPoll <= IEM_TIMER_POLL_MAX_ITER)
3493 { /* likely */ }
3494 else
3495 {
3496 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollMax);
3497 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3498 }
3499 }
3500 else
3501 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3502 }
3503 else
3504 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollUnchanged);
3505 }
3506 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3507 }
3508#else
3509/** Poll timers every 400 us / 2500 Hz. (source: thin air) */
3510# define IEM_TIMER_POLL_IDEAL_NS (400U * RT_NS_1US)
3511 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3512 uint32_t const cNsIdealPollInterval = IEM_TIMER_POLL_IDEAL_NS;
3513 int64_t const nsFromIdeal = cNsSinceLast - cNsIdealPollInterval;
3514 if (nsFromIdeal < 0)
3515 {
3516 if ((uint64_t)-nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll < _64K)
3517 {
3518 cItersTillNextPoll += cItersTillNextPoll / 8;
3519 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3520 }
3521 }
3522 else
3523 {
3524 if ((uint64_t)nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll > 256)
3525 {
3526 cItersTillNextPoll -= cItersTillNextPoll / 8;
3527 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3528 }
3529 }
3530#endif
3531 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillNextPoll;
3532
3533 /*
3534 * Repeat the IRQ and FF checks.
3535 */
3536 if (cNsDelta > 0)
3537 {
3538 uint32_t fCpu = pVCpu->fLocalForcedActions;
3539 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3540 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3541 | VMCPU_FF_TLB_FLUSH
3542 | VMCPU_FF_UNHALT );
3543 if (RT_LIKELY( ( !fCpu
3544 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3545 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3546 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx)) ) )
3547 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
3548 {
3549 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3550 return VINF_SUCCESS;
3551 }
3552 }
3553 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3554 return VINF_IEM_REEXEC_BREAK_FF;
3555}
3556
3557
3558/** Helper for iemTbExec. */
3559DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
3560{
3561 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
3562 Assert(idx < pTb->cTbLookupEntries);
3563 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
3564}
3565
3566
3567/**
3568 * Executes a translation block.
3569 *
3570 * @returns Strict VBox status code.
3571 * @param pVCpu The cross context virtual CPU structure of the calling
3572 * thread.
3573 * @param pTb The translation block to execute.
3574 */
3575static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
3576{
3577 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
3578
3579 /*
3580 * Set the current TB so CIMPL functions may get at it.
3581 */
3582 pVCpu->iem.s.pCurTbR3 = pTb;
3583 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
3584
3585 /*
3586 * Execute the block.
3587 */
3588#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3589 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
3590 {
3591 pVCpu->iem.s.cTbExecNative++;
3592 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
3593# ifdef LOG_ENABLED
3594 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
3595# endif
3596
3597# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3598 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
3599# endif
3600# ifdef RT_ARCH_AMD64
3601 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
3602# else
3603 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
3604# endif
3605
3606# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3607 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3608# endif
3609# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3610 /* Restore FPCR/MXCSR if the TB modified it. */
3611 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3612 {
3613 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3614 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3615 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3616 }
3617# endif
3618# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3619 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
3620# endif
3621 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3622 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3623 { /* likely */ }
3624 else
3625 {
3626 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
3627 pVCpu->iem.s.pCurTbR3 = NULL;
3628
3629 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3630 only to break out of TB execution early. */
3631 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3632 {
3633 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
3634 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3635 }
3636
3637 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
3638 only to break out of TB execution early due to pending FFs. */
3639 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
3640 {
3641 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
3642 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3643 }
3644
3645 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
3646 and converted to VINF_SUCCESS or whatever is appropriate. */
3647 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
3648 {
3649 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
3650 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
3651 }
3652
3653 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
3654 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3655 }
3656 }
3657 else
3658#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
3659 {
3660 /*
3661 * The threaded execution loop.
3662 */
3663 pVCpu->iem.s.cTbExecThreaded++;
3664 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
3665#ifdef LOG_ENABLED
3666 uint64_t uRipPrev = UINT64_MAX;
3667#endif
3668 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3669 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3670 while (cCallsLeft-- > 0)
3671 {
3672#ifdef LOG_ENABLED
3673 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
3674 {
3675 uRipPrev = pVCpu->cpum.GstCtx.rip;
3676 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
3677 }
3678 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
3679 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
3680 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
3681#endif
3682#ifdef VBOX_WITH_STATISTICS
3683 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
3684 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
3685#endif
3686 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
3687 pCallEntry->auParams[0],
3688 pCallEntry->auParams[1],
3689 pCallEntry->auParams[2]);
3690 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3691 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3692 pCallEntry++;
3693 else if (rcStrict == VINF_IEM_REEXEC_JUMP)
3694 {
3695 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
3696 Assert(cCallsLeft == 0);
3697 uint32_t const idxTarget = (uint32_t)pCallEntry->auParams[0];
3698 cCallsLeft = pTb->Thrd.cCalls;
3699 AssertBreak(idxTarget < cCallsLeft - 1);
3700 cCallsLeft -= idxTarget;
3701 pCallEntry = &pTb->Thrd.paCalls[idxTarget];
3702 AssertBreak(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET);
3703 }
3704 else
3705 {
3706 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
3707 pVCpu->iem.s.pCurTbR3 = NULL;
3708 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
3709 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
3710
3711 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3712 only to break out of TB execution early. */
3713 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3714 {
3715#ifdef VBOX_WITH_STATISTICS
3716 if (pCallEntry->uTbLookup)
3717 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3718 else
3719 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3720#endif
3721 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3722 }
3723 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3724 }
3725 }
3726
3727 /* Update the lookup entry. */
3728 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3729 }
3730
3731 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3732 pVCpu->iem.s.pCurTbR3 = NULL;
3733 return VINF_SUCCESS;
3734}
3735
3736
3737/**
3738 * This is called when the PC doesn't match the current pbInstrBuf.
3739 *
3740 * Upon return, we're ready for opcode fetching. But please note that
3741 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3742 * MMIO or unassigned).
3743 */
3744static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3745{
3746 pVCpu->iem.s.pbInstrBuf = NULL;
3747 pVCpu->iem.s.offCurInstrStart = 0;
3748 pVCpu->iem.s.offInstrNextByte = 0;
3749 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3750 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3751}
3752
3753
3754/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3755DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3756{
3757 /*
3758 * Set uCurTbStartPc to RIP and calc the effective PC.
3759 */
3760 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3761#if 0 /* unused */
3762 pVCpu->iem.s.uCurTbStartPc = uPc;
3763#endif
3764 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3765 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3766
3767 /*
3768 * Advance within the current buffer (PAGE) when possible.
3769 */
3770 if (pVCpu->iem.s.pbInstrBuf)
3771 {
3772 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3773 if (off < pVCpu->iem.s.cbInstrBufTotal)
3774 {
3775 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3776 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3777 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3778 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3779 else
3780 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3781
3782 return pVCpu->iem.s.GCPhysInstrBuf + off;
3783 }
3784 }
3785 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3786}
3787
3788
3789/**
3790 * Determines the extra IEMTB_F_XXX flags.
3791 *
3792 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3793 * IEMTB_F_CS_LIM_CHECKS (or zero).
3794 * @param pVCpu The cross context virtual CPU structure of the calling
3795 * thread.
3796 */
3797DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3798{
3799 uint32_t fRet = 0;
3800
3801 /*
3802 * Determine the inhibit bits.
3803 */
3804 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3805 { /* typical */ }
3806 else
3807 {
3808 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3809 fRet |= IEMTB_F_INHIBIT_SHADOW;
3810 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3811 fRet |= IEMTB_F_INHIBIT_NMI;
3812 }
3813
3814 /*
3815 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3816 * likely to go invalid before the end of the translation block.
3817 */
3818 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3819 return fRet;
3820
3821 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3822 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3823 return fRet;
3824 return fRet | IEMTB_F_CS_LIM_CHECKS;
3825}
3826
3827
3828VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu, bool fWasHalted)
3829{
3830 /*
3831 * See if there is an interrupt pending in TRPM, inject it if we can.
3832 */
3833 if (!TRPMHasTrap(pVCpu))
3834 { /* likely */ }
3835 else
3836 {
3837 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3838 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3839 { /*likely */ }
3840 else
3841 return rcStrict;
3842 }
3843
3844 /*
3845 * Init the execution environment.
3846 */
3847#if 1 /** @todo this seems like a good idea, however if we ever share memory
3848 * directly with other threads on the host, it isn't necessarily... */
3849 if (pVM->cCpus == 1)
3850 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3851 else
3852#endif
3853 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3854
3855 if (RT_LIKELY(!fWasHalted && pVCpu->iem.s.msRecompilerPollNow != 0))
3856 { }
3857 else
3858 {
3859 /* Do polling after halt and the first time we get here. */
3860#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3861 uint64_t nsNow = 0;
3862 uint32_t const cItersTillPoll = iemPollTimersCalcDefaultCountdown(TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow));
3863 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillPoll;
3864 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillPoll;
3865#else
3866 uint64_t const nsNow = TMVirtualGetNoCheck(pVM);
3867#endif
3868 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3869 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3870 }
3871 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3872
3873 /*
3874 * Run-loop.
3875 *
3876 * If we're using setjmp/longjmp we combine all the catching here to avoid
3877 * having to call setjmp for each block we're executing.
3878 */
3879 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3880 for (;;)
3881 {
3882 VBOXSTRICTRC rcStrict;
3883 IEM_TRY_SETJMP(pVCpu, rcStrict)
3884 {
3885 for (;;)
3886 {
3887 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3888 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3889 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3890 {
3891 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3892 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3893 if (pTb)
3894 rcStrict = iemTbExec(pVCpu, pTb);
3895 else
3896 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3897 }
3898 else
3899 {
3900 /* This can only happen if the current PC cannot be translated into a
3901 host pointer, which means we're in MMIO or unmapped memory... */
3902#if defined(VBOX_STRICT) && defined(IN_RING3)
3903 rcStrict = DBGFSTOP(pVM);
3904 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3905 return rcStrict;
3906#endif
3907 rcStrict = IEMExecLots(pVCpu, 2048, 511, NULL);
3908 }
3909 if (rcStrict == VINF_SUCCESS)
3910 {
3911 Assert(pVCpu->iem.s.cActiveMappings == 0);
3912
3913 /* Note! This IRQ/FF check is repeated in iemPollTimers, iemThreadedFunc_BltIn_CheckIrq
3914 and emitted by iemNativeRecompFunc_BltIn_CheckIrq. */
3915 uint64_t fCpu = pVCpu->fLocalForcedActions;
3916 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3917 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3918 | VMCPU_FF_TLB_FLUSH
3919 | VMCPU_FF_UNHALT );
3920 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3921 if (RT_LIKELY( ( !fCpu
3922 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3923 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3924 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3925 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3926 {
3927 /* Once in a while we need to poll timers here. */
3928 if ((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
3929 { /* likely */ }
3930 else
3931 {
3932 int rc = iemPollTimers(pVM, pVCpu);
3933 if (rc != VINF_SUCCESS)
3934 return VINF_SUCCESS;
3935 }
3936 }
3937 else
3938 return VINF_SUCCESS;
3939 }
3940 else
3941 return rcStrict;
3942 }
3943 }
3944 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3945 {
3946 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3947 pVCpu->iem.s.cLongJumps++;
3948#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3949 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3950#endif
3951 if (pVCpu->iem.s.cActiveMappings > 0)
3952 iemMemRollback(pVCpu);
3953
3954#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3955 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3956 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3957 {
3958 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3959# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3960 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3961 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3962# endif
3963
3964#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3965 /* Restore FPCR/MXCSR if the TB modified it. */
3966 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3967 {
3968 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3969 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3970 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3971 }
3972#endif
3973 }
3974#endif
3975
3976#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3977 /* If pTb isn't NULL we're in iemTbExec. */
3978 if (!pTb)
3979 {
3980 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3981 pTb = pVCpu->iem.s.pCurTbR3;
3982 if (pTb)
3983 {
3984 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3985 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3986 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3987 }
3988 }
3989#endif
3990 pVCpu->iem.s.pCurTbR3 = NULL;
3991 return rcStrict;
3992 }
3993 IEM_CATCH_LONGJMP_END(pVCpu);
3994 }
3995}
3996
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette