VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 106401

Last change on this file since 106401 was 106401, checked in by vboxsync, 6 weeks ago

VMM/IEM: Disable control flow guard for the core IEM jump table call loops. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 161.6 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 106401 2024-10-16 20:56:01Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
117# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
118#endif
119
120
121/*********************************************************************************************************************************
122* Internal Functions *
123*********************************************************************************************************************************/
124#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
125static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb);
126#endif
127
128
129/**
130 * Calculates the effective address of a ModR/M memory operand, extended version
131 * for use in the recompilers.
132 *
133 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
134 *
135 * May longjmp on internal error.
136 *
137 * @return The effective address.
138 * @param pVCpu The cross context virtual CPU structure of the calling thread.
139 * @param bRm The ModRM byte.
140 * @param cbImmAndRspOffset - First byte: The size of any immediate
141 * following the effective address opcode bytes
142 * (only for RIP relative addressing).
143 * - Second byte: RSP displacement (for POP [ESP]).
144 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
145 * SIB byte (bits 39:32).
146 *
147 * @note This must be defined in a source file with matching
148 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
149 * or implemented differently...
150 */
151RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
152{
153 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
154# define SET_SS_DEF() \
155 do \
156 { \
157 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
158 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
159 } while (0)
160
161 if (!IEM_IS_64BIT_CODE(pVCpu))
162 {
163/** @todo Check the effective address size crap! */
164 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
165 {
166 uint16_t u16EffAddr;
167
168 /* Handle the disp16 form with no registers first. */
169 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
170 {
171 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
172 *puInfo = u16EffAddr;
173 }
174 else
175 {
176 /* Get the displacment. */
177 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
178 {
179 case 0: u16EffAddr = 0; break;
180 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
181 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
182 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
183 }
184 *puInfo = u16EffAddr;
185
186 /* Add the base and index registers to the disp. */
187 switch (bRm & X86_MODRM_RM_MASK)
188 {
189 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
190 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
191 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
192 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
193 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
194 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
195 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
196 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
197 }
198 }
199
200 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
201 return u16EffAddr;
202 }
203
204 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
205 uint32_t u32EffAddr;
206 uint64_t uInfo;
207
208 /* Handle the disp32 form with no registers first. */
209 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
210 {
211 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
212 uInfo = u32EffAddr;
213 }
214 else
215 {
216 /* Get the register (or SIB) value. */
217 uInfo = 0;
218 switch ((bRm & X86_MODRM_RM_MASK))
219 {
220 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
221 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
222 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
223 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
224 case 4: /* SIB */
225 {
226 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
227 uInfo = (uint64_t)bSib << 32;
228
229 /* Get the index and scale it. */
230 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
231 {
232 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
233 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
234 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
235 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
236 case 4: u32EffAddr = 0; /*none */ break;
237 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
238 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
239 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
240 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
241 }
242 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
243
244 /* add base */
245 switch (bSib & X86_SIB_BASE_MASK)
246 {
247 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
248 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
249 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
250 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
251 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
252 case 5:
253 if ((bRm & X86_MODRM_MOD_MASK) != 0)
254 {
255 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
256 SET_SS_DEF();
257 }
258 else
259 {
260 uint32_t u32Disp;
261 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
262 u32EffAddr += u32Disp;
263 uInfo |= u32Disp;
264 }
265 break;
266 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
267 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
268 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
269 }
270 break;
271 }
272 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
273 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
274 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
275 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
276 }
277
278 /* Get and add the displacement. */
279 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
280 {
281 case 0:
282 break;
283 case 1:
284 {
285 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
286 u32EffAddr += i8Disp;
287 uInfo |= (uint32_t)(int32_t)i8Disp;
288 break;
289 }
290 case 2:
291 {
292 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
293 u32EffAddr += u32Disp;
294 uInfo |= u32Disp;
295 break;
296 }
297 default:
298 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
299 }
300 }
301
302 *puInfo = uInfo;
303 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
304 return u32EffAddr;
305 }
306
307 uint64_t u64EffAddr;
308 uint64_t uInfo;
309
310 /* Handle the rip+disp32 form with no registers first. */
311 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
312 {
313 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
314 uInfo = (uint32_t)u64EffAddr;
315 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
316 }
317 else
318 {
319 /* Get the register (or SIB) value. */
320 uInfo = 0;
321 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
322 {
323 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
324 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
325 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
326 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
327 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
328 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
329 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
330 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
331 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
332 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
333 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
334 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
335 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
336 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
337 /* SIB */
338 case 4:
339 case 12:
340 {
341 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
342 uInfo = (uint64_t)bSib << 32;
343
344 /* Get the index and scale it. */
345 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
346 {
347 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
348 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
349 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
350 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
351 case 4: u64EffAddr = 0; /*none */ break;
352 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
353 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
354 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
355 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
356 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
357 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
358 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
359 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
360 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
361 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
362 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
363 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
364 }
365 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
366
367 /* add base */
368 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
369 {
370 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
371 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
372 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
373 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
374 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
375 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
376 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
377 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
378 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
379 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
380 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
381 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
382 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
383 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
384 /* complicated encodings */
385 case 5:
386 case 13:
387 if ((bRm & X86_MODRM_MOD_MASK) != 0)
388 {
389 if (!pVCpu->iem.s.uRexB)
390 {
391 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
392 SET_SS_DEF();
393 }
394 else
395 u64EffAddr += pVCpu->cpum.GstCtx.r13;
396 }
397 else
398 {
399 uint32_t u32Disp;
400 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
401 u64EffAddr += (int32_t)u32Disp;
402 uInfo |= u32Disp;
403 }
404 break;
405 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
406 }
407 break;
408 }
409 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
410 }
411
412 /* Get and add the displacement. */
413 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
414 {
415 case 0:
416 break;
417 case 1:
418 {
419 int8_t i8Disp;
420 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
421 u64EffAddr += i8Disp;
422 uInfo |= (uint32_t)(int32_t)i8Disp;
423 break;
424 }
425 case 2:
426 {
427 uint32_t u32Disp;
428 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
429 u64EffAddr += (int32_t)u32Disp;
430 uInfo |= u32Disp;
431 break;
432 }
433 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
434 }
435
436 }
437
438 *puInfo = uInfo;
439 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
440 {
441 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
442 return u64EffAddr;
443 }
444 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
445 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
446 return u64EffAddr & UINT32_MAX;
447}
448
449
450
451/*********************************************************************************************************************************
452* Translation Block Cache. *
453*********************************************************************************************************************************/
454
455/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
456static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
457{
458 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
459 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
460 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
461 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
462 if (cMsSinceUse1 != cMsSinceUse2)
463 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
464 if (pTb1->cUsed != pTb2->cUsed)
465 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
466 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
467 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
468 return 0;
469}
470
471#ifdef VBOX_STRICT
472/**
473 * Assertion helper that checks a collisions list count.
474 */
475static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
476{
477 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
478 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
479 while (pTb)
480 {
481 pTb = pTb->pNext;
482 cLeft--;
483 }
484 AssertMsg(cLeft == 0,
485 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
486 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
487}
488#endif
489
490
491DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
492{
493 STAM_PROFILE_START(&pTbCache->StatPrune, a);
494
495 /*
496 * First convert the collision list to an array.
497 */
498 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
499 uintptr_t cInserted = 0;
500 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
501
502 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
503
504 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
505 {
506 apSortedTbs[cInserted++] = pTbCollision;
507 pTbCollision = pTbCollision->pNext;
508 }
509
510 /* Free any excess (impossible). */
511 if (RT_LIKELY(!pTbCollision))
512 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
513 else
514 do
515 {
516 PIEMTB pTbToFree = pTbCollision;
517 pTbCollision = pTbToFree->pNext;
518 iemTbAllocatorFree(pVCpu, pTbToFree);
519 } while (pTbCollision);
520
521 /*
522 * Sort it by most recently used and usage count.
523 */
524 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
525
526 /* We keep half the list for now. Perhaps a bit aggressive... */
527 uintptr_t const cKeep = cInserted / 2;
528
529 /* First free up the TBs we don't wish to keep (before creating the new
530 list because otherwise the free code will scan the list for each one
531 without ever finding it). */
532 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
533 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
534
535 /* Then chain the new TB together with the ones we like to keep of the
536 existing ones and insert this list into the hash table. */
537 pTbCollision = pTb;
538 for (uintptr_t idx = 0; idx < cKeep; idx++)
539 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
540 pTbCollision->pNext = NULL;
541
542 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
543#ifdef VBOX_STRICT
544 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
545#endif
546
547 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
548}
549
550
551static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
552{
553 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
554 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
555 if (!pTbOldHead)
556 {
557 pTb->pNext = NULL;
558 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
559 }
560 else
561 {
562 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
563 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
564 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
565 {
566 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
567 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
568#ifdef VBOX_STRICT
569 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
570#endif
571 }
572 else
573 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
574 }
575}
576
577
578/**
579 * Unlinks @a pTb from the hash table if found in it.
580 *
581 * @returns true if unlinked, false if not present.
582 * @param pTbCache The hash table.
583 * @param pTb The TB to remove.
584 */
585static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
586{
587 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
588 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
589 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
590
591 /*
592 * At the head of the collision list?
593 */
594 if (pTbHash == pTb)
595 {
596 if (!pTb->pNext)
597 pTbCache->apHash[idxHash] = NULL;
598 else
599 {
600 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
601 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
602#ifdef VBOX_STRICT
603 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
604#endif
605 }
606 return true;
607 }
608
609 /*
610 * Search the collision list.
611 */
612 PIEMTB const pTbHead = pTbHash;
613 while (pTbHash)
614 {
615 PIEMTB const pNextTb = pTbHash->pNext;
616 if (pNextTb == pTb)
617 {
618 pTbHash->pNext = pTb->pNext;
619 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
620#ifdef VBOX_STRICT
621 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
622#endif
623 return true;
624 }
625 pTbHash = pNextTb;
626 }
627 return false;
628}
629
630
631/**
632 * Looks up a TB for the given PC and flags in the cache.
633 *
634 * @returns Pointer to TB on success, NULL if not found.
635 * @param pVCpu The cross context virtual CPU structure of the
636 * calling thread.
637 * @param pTbCache The translation block cache.
638 * @param GCPhysPc The PC to look up a TB for.
639 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
640 * the lookup.
641 * @thread EMT(pVCpu)
642 */
643static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
644 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
645{
646 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
647
648 /*
649 * First consult the lookup table entry.
650 */
651 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
652 PIEMTB pTb = *ppTbLookup;
653 if (pTb)
654 {
655 if (pTb->GCPhysPc == GCPhysPc)
656 {
657 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
658 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
659 {
660 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
661 {
662 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
663 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
664 pTb->cUsed++;
665#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
666 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
667 {
668 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
669 return pTb;
670 }
671 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
672# ifdef VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING
673 iemThreadedSaveTbForProfiling(pVCpu, pTb);
674# endif
675 return iemNativeRecompile(pVCpu, pTb);
676#else
677 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
678 return pTb;
679#endif
680 }
681 }
682 }
683 }
684
685 /*
686 * Then consult the hash table.
687 */
688 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
689#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
690 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
691#endif
692 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
693 while (pTb)
694 {
695 if (pTb->GCPhysPc == GCPhysPc)
696 {
697 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
698 {
699 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
700 {
701 STAM_COUNTER_INC(&pTbCache->cLookupHits);
702 AssertMsg(cLeft > 0, ("%d\n", cLeft));
703
704 *ppTbLookup = pTb;
705 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
706 pTb->cUsed++;
707#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
708 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
709 {
710 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
711 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
712 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
713 return pTb;
714 }
715 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
716 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
717 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
718 return iemNativeRecompile(pVCpu, pTb);
719#else
720 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
721 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
722 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
723 return pTb;
724#endif
725 }
726 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
727 }
728 else
729 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
730 }
731 else
732 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
733
734 pTb = pTb->pNext;
735#ifdef VBOX_STRICT
736 cLeft--;
737#endif
738 }
739 AssertMsg(cLeft == 0, ("%d\n", cLeft));
740 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
741 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
742 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
743 return pTb;
744}
745
746
747/*********************************************************************************************************************************
748* Translation Block Allocator.
749*********************************************************************************************************************************/
750/*
751 * Translation block allocationmanagement.
752 */
753
754#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
755# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
756 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
757# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
758 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
759# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
760 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
761#else
762# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
763 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
764# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
765 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
766# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
767 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
768#endif
769/** Makes a TB index from a chunk index and TB index within that chunk. */
770#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
771 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
772
773
774/**
775 * Initializes the TB allocator and cache for an EMT.
776 *
777 * @returns VBox status code.
778 * @param pVM The VM handle.
779 * @param cInitialTbs The initial number of translation blocks to
780 * preallocator.
781 * @param cMaxTbs The max number of translation blocks allowed.
782 * @param cbInitialExec The initial size of the executable memory allocator.
783 * @param cbMaxExec The max size of the executable memory allocator.
784 * @param cbChunkExec The chunk size for executable memory allocator. Zero
785 * or UINT32_MAX for automatically determining this.
786 * @thread EMT
787 */
788DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
789 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
790{
791 PVMCPUCC pVCpu = VMMGetCpu(pVM);
792 Assert(!pVCpu->iem.s.pTbCacheR3);
793 Assert(!pVCpu->iem.s.pTbAllocatorR3);
794
795 /*
796 * Calculate the chunk size of the TB allocator.
797 * The minimum chunk size is 2MiB.
798 */
799 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
800 uint32_t cbPerChunk = _2M;
801 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
802#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
803 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
804 uint8_t cChunkShift = 21 - cTbShift;
805 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
806#endif
807 for (;;)
808 {
809 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
810 break;
811 cbPerChunk *= 2;
812 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
813#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
814 cChunkShift += 1;
815#endif
816 }
817
818 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
819 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
820 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
821
822 cMaxTbs = cMaxChunks * cTbsPerChunk;
823
824 /*
825 * Allocate and initalize it.
826 */
827 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
828 if (!pTbAllocator)
829 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
830 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
831 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
832 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
833 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
834 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
835 pTbAllocator->cbPerChunk = cbPerChunk;
836 pTbAllocator->cMaxTbs = cMaxTbs;
837 pTbAllocator->pTbsFreeHead = NULL;
838#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
839 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
840 pTbAllocator->cChunkShift = cChunkShift;
841 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
842#endif
843
844 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
845
846 /*
847 * Allocate the initial chunks.
848 */
849 for (uint32_t idxChunk = 0; ; idxChunk++)
850 {
851 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
852 if (!paTbs)
853 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
854 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
855 cbPerChunk, idxChunk, pVCpu->idCpu);
856
857 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
858 {
859 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
860 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
861 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
862 }
863 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
864 pTbAllocator->cTotalTbs += cTbsPerChunk;
865
866 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
867 break;
868 }
869
870 /*
871 * Calculate the size of the hash table. We double the max TB count and
872 * round it up to the nearest power of two.
873 */
874 uint32_t cCacheEntries = cMaxTbs * 2;
875 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
876 {
877 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
878 cCacheEntries = RT_BIT_32(iBitTop);
879 Assert(cCacheEntries >= cMaxTbs * 2);
880 }
881
882 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
883 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
884 if (!pTbCache)
885 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
886 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
887 cbTbCache, cCacheEntries, pVCpu->idCpu);
888
889 /*
890 * Initialize it (assumes zeroed by the allocator).
891 */
892 pTbCache->uMagic = IEMTBCACHE_MAGIC;
893 pTbCache->cHash = cCacheEntries;
894 pTbCache->uHashMask = cCacheEntries - 1;
895 Assert(pTbCache->cHash > pTbCache->uHashMask);
896 pVCpu->iem.s.pTbCacheR3 = pTbCache;
897
898 /*
899 * Initialize the native executable memory allocator.
900 */
901#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
902 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
903 AssertLogRelRCReturn(rc, rc);
904#else
905 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
906#endif
907
908 return VINF_SUCCESS;
909}
910
911
912/**
913 * Inner free worker.
914 *
915 * The @a a_fType parameter allows us to eliminate the type check when we know
916 * which type of TB is being freed.
917 */
918template<uint32_t a_fType>
919DECL_FORCE_INLINE(void)
920iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
921{
922#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
923 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED || a_fType == IEMTB_F_TYPE_NATIVE);
924#else
925 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED);
926#endif
927 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
928 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
929 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
930#ifdef VBOX_STRICT
931 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
932 Assert(pTbOther != pTb);
933#endif
934
935 /*
936 * Unlink the TB from the hash table.
937 */
938 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
939
940 /*
941 * Free the TB itself.
942 */
943 if RT_CONSTEXPR_IF(a_fType == 0)
944 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
945 {
946 case IEMTB_F_TYPE_THREADED:
947 pTbAllocator->cThreadedTbs -= 1;
948 RTMemFree(pTb->Thrd.paCalls);
949 break;
950#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
951 case IEMTB_F_TYPE_NATIVE:
952 pTbAllocator->cNativeTbs -= 1;
953 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
954 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
955 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
956 break;
957#endif
958 default:
959 AssertFailed();
960 }
961#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
962 else if RT_CONSTEXPR_IF(a_fType == IEMTB_F_TYPE_NATIVE)
963 {
964 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
965 pTbAllocator->cNativeTbs -= 1;
966 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
967 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
968 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
969 }
970#endif
971 else
972 {
973 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
974 pTbAllocator->cThreadedTbs -= 1;
975 RTMemFree(pTb->Thrd.paCalls);
976 }
977
978 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
979
980 pTb->pNext = pTbAllocator->pTbsFreeHead;
981 pTbAllocator->pTbsFreeHead = pTb;
982 pTb->fFlags = 0;
983 pTb->GCPhysPc = UINT64_MAX;
984 pTb->Gen.uPtr = 0;
985 pTb->Gen.uData = 0;
986 pTb->cTbLookupEntries = 0;
987 pTb->cbOpcodes = 0;
988 pTb->pabOpcodes = NULL;
989
990 Assert(pTbAllocator->cInUseTbs > 0);
991
992 pTbAllocator->cInUseTbs -= 1;
993 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
994}
995
996
997/**
998 * Frees the given TB.
999 *
1000 * @param pVCpu The cross context virtual CPU structure of the calling
1001 * thread.
1002 * @param pTb The translation block to free.
1003 * @thread EMT(pVCpu)
1004 */
1005DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
1006{
1007 /*
1008 * Validate state.
1009 */
1010 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1011 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1012 uint8_t const idxChunk = pTb->idxAllocChunk;
1013 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1014 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1015 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1016
1017 /*
1018 * Invalidate the TB lookup pointer and call the inner worker.
1019 */
1020 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1021 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1022}
1023
1024#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
1025
1026/**
1027 * Interface used by iemExecMemAllocatorPrune.
1028 */
1029DECLHIDDEN(void) iemTbAllocatorFreeBulk(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb)
1030{
1031 Assert(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1032
1033 uint8_t const idxChunk = pTb->idxAllocChunk;
1034 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1035 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1036 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1037
1038 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1039}
1040
1041
1042/**
1043 * Interface used by iemExecMemAllocatorPrune.
1044 */
1045DECLHIDDEN(PIEMTBALLOCATOR) iemTbAllocatorFreeBulkStart(PVMCPUCC pVCpu)
1046{
1047 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1048 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1049
1050 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1051
1052 /* It should be sufficient to do this once. */
1053 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1054
1055 return pTbAllocator;
1056}
1057
1058#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
1059
1060/**
1061 * Schedules a TB for freeing when it's not longer being executed and/or part of
1062 * the caller's call stack.
1063 *
1064 * The TB will be removed from the translation block cache, though, so it isn't
1065 * possible to executed it again and the IEMTB::pNext member can be used to link
1066 * it together with other TBs awaiting freeing.
1067 *
1068 * @param pVCpu The cross context virtual CPU structure of the calling
1069 * thread.
1070 * @param pTb The translation block to schedule for freeing.
1071 */
1072static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1073{
1074 /*
1075 * Validate state.
1076 */
1077 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1078 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1079 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1080 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1081 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1082 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1083#ifdef VBOX_STRICT
1084 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1085 Assert(pTbOther != pTb);
1086#endif
1087
1088 /*
1089 * Remove it from the cache and prepend it to the allocator's todo list.
1090 *
1091 * Note! It could still be in various lookup tables, so we trash the GCPhys
1092 * and CS attribs to ensure it won't be reused.
1093 */
1094 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1095 pTb->GCPhysPc = NIL_RTGCPHYS;
1096 pTb->x86.fAttr = UINT16_MAX;
1097
1098 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1099 pTbAllocator->pDelayedFreeHead = pTb;
1100}
1101
1102
1103/**
1104 * Processes the delayed frees.
1105 *
1106 * This is called by the allocator function as well as the native recompile
1107 * function before making any TB or executable memory allocations respectively.
1108 */
1109void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1110{
1111 /** @todo r-bird: these have already been removed from the cache,
1112 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1113 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1114 pTbAllocator->pDelayedFreeHead = NULL;
1115 while (pTb)
1116 {
1117 PIEMTB const pTbNext = pTb->pNext;
1118 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1119 iemTbAllocatorFree(pVCpu, pTb);
1120 pTb = pTbNext;
1121 }
1122}
1123
1124
1125#if 0
1126/**
1127 * Frees all TBs.
1128 */
1129static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1130{
1131 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1132 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1133 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1134
1135 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1136
1137 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1138 while (idxChunk-- > 0)
1139 {
1140 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1141 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1142 while (idxTb-- > 0)
1143 {
1144 PIEMTB const pTb = &paTbs[idxTb];
1145 if (pTb->fFlags)
1146 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1147 }
1148 }
1149
1150 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1151
1152# if 1
1153 /* Reset the free list. */
1154 pTbAllocator->pTbsFreeHead = NULL;
1155 idxChunk = pTbAllocator->cAllocatedChunks;
1156 while (idxChunk-- > 0)
1157 {
1158 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1159 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1160 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1161 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1162 {
1163 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1164 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1165 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1166 }
1167 }
1168# endif
1169
1170# if 1
1171 /* Completely reset the TB cache. */
1172 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1173# endif
1174
1175 return VINF_SUCCESS;
1176}
1177#endif
1178
1179
1180/**
1181 * Grow the translation block allocator with another chunk.
1182 */
1183static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1184{
1185 /*
1186 * Validate state.
1187 */
1188 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1189 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1190 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1191 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1192 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1193
1194 /*
1195 * Allocate a new chunk and add it to the allocator.
1196 */
1197 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1198 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1199 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1200
1201 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1202 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1203 {
1204 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1205 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1206 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1207 }
1208 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1209 pTbAllocator->cTotalTbs += cTbsPerChunk;
1210
1211 return VINF_SUCCESS;
1212}
1213
1214
1215/**
1216 * Allocates a TB from allocator with free block.
1217 *
1218 * This is common code to both the fast and slow allocator code paths.
1219 */
1220DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1221{
1222 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1223 Assert(pTbAllocator->pTbsFreeHead);
1224
1225 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1226 pTbAllocator->pTbsFreeHead = pTb->pNext;
1227 pTbAllocator->cInUseTbs += 1;
1228 if (fThreaded)
1229 pTbAllocator->cThreadedTbs += 1;
1230 else
1231 pTbAllocator->cNativeTbs += 1;
1232 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1233 return pTb;
1234}
1235
1236
1237/**
1238 * Slow path for iemTbAllocatorAlloc.
1239 */
1240static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1241{
1242 /*
1243 * With some luck we can add another chunk.
1244 */
1245 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1246 {
1247 int rc = iemTbAllocatorGrow(pVCpu);
1248 if (RT_SUCCESS(rc))
1249 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1250 }
1251
1252 /*
1253 * We have to prune stuff. Sigh.
1254 *
1255 * This requires scanning for older TBs and kick them out. Not sure how to
1256 * best do this as we don't want to maintain any list of TBs ordered by last
1257 * usage time. But one reasonably simple approach would be that each time we
1258 * get here we continue a sequential scan of the allocation chunks,
1259 * considering just a smallish number of TBs and freeing a fixed portion of
1260 * them. Say, we consider the next 128 TBs, freeing the least recently used
1261 * in out of groups of 4 TBs, resulting in 32 free TBs.
1262 */
1263 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1264 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1265 uint32_t const cTbsToPrune = 128;
1266 uint32_t const cTbsPerGroup = 4;
1267 uint32_t cFreedTbs = 0;
1268#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1269 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1270#else
1271 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1272#endif
1273 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1274 idxTbPruneFrom = 0;
1275 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1276 {
1277 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1278 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1279 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1280 uint32_t cMsAge = msNow - pTb->msLastUsed;
1281 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1282
1283 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1284 {
1285#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1286 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1287 { /* likely */ }
1288 else
1289 {
1290 idxInChunk2 = 0;
1291 idxChunk2 += 1;
1292 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1293 idxChunk2 = 0;
1294 }
1295#endif
1296 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1297 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1298 if ( cMsAge2 > cMsAge
1299 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1300 {
1301 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1302 pTb = pTb2;
1303 idxChunk = idxChunk2;
1304 idxInChunk = idxInChunk2;
1305 cMsAge = cMsAge2;
1306 }
1307 }
1308
1309 /* Free the TB. */
1310 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1311 cFreedTbs++; /* paranoia */
1312 }
1313 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1314 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1315
1316 /* Flush the TB lookup entry pointer. */
1317 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1318
1319 /*
1320 * Allocate a TB from the ones we've pruned.
1321 */
1322 if (cFreedTbs)
1323 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1324 return NULL;
1325}
1326
1327
1328/**
1329 * Allocate a translation block.
1330 *
1331 * @returns Pointer to block on success, NULL if we're out and is unable to
1332 * free up an existing one (very unlikely once implemented).
1333 * @param pVCpu The cross context virtual CPU structure of the calling
1334 * thread.
1335 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1336 * For statistics.
1337 */
1338DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1339{
1340 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1341 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1342
1343 /* Free any pending TBs before we proceed. */
1344 if (!pTbAllocator->pDelayedFreeHead)
1345 { /* probably likely */ }
1346 else
1347 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1348
1349 /* If the allocator is full, take slow code path.*/
1350 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1351 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1352 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1353}
1354
1355
1356/**
1357 * This is called when we're out of space for native TBs.
1358 *
1359 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1360 * The difference is that we only prune native TBs and will only free any if
1361 * there are least two in a group. The conditions under which we're called are
1362 * different - there will probably be free TBs in the table when we're called.
1363 * Therefore we increase the group size and max scan length, though we'll stop
1364 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1365 * up at least 8 TBs.
1366 */
1367void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1368{
1369 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1370 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1371
1372 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1373
1374 /*
1375 * Flush the delayed free list before we start freeing TBs indiscriminately.
1376 */
1377 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1378
1379 /*
1380 * Scan and free TBs.
1381 */
1382 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1383 uint32_t const cTbsToPrune = 128 * 8;
1384 uint32_t const cTbsPerGroup = 4 * 4;
1385 uint32_t cFreedTbs = 0;
1386 uint32_t cMaxInstrs = 0;
1387 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1388 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1389 {
1390 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1391 idxTbPruneFrom = 0;
1392 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1393 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1394 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1395 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1396 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1397
1398 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1399 {
1400 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1401 { /* likely */ }
1402 else
1403 {
1404 idxInChunk2 = 0;
1405 idxChunk2 += 1;
1406 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1407 idxChunk2 = 0;
1408 }
1409 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1410 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1411 {
1412 cNativeTbs += 1;
1413 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1414 if ( cMsAge2 > cMsAge
1415 || ( cMsAge2 == cMsAge
1416 && ( pTb2->cUsed < pTb->cUsed
1417 || ( pTb2->cUsed == pTb->cUsed
1418 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1419 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1420 {
1421 pTb = pTb2;
1422 idxChunk = idxChunk2;
1423 idxInChunk = idxInChunk2;
1424 cMsAge = cMsAge2;
1425 }
1426 }
1427 }
1428
1429 /* Free the TB if we found at least two native one in this group. */
1430 if (cNativeTbs >= 2)
1431 {
1432 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1433 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1434 cFreedTbs++;
1435 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1436 break;
1437 }
1438 }
1439 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1440
1441 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1442}
1443
1444
1445/*********************************************************************************************************************************
1446* Threaded Recompiler Core *
1447*********************************************************************************************************************************/
1448/**
1449 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1450 * @returns pszBuf.
1451 * @param fFlags The flags.
1452 * @param pszBuf The output buffer.
1453 * @param cbBuf The output buffer size. At least 32 bytes.
1454 */
1455DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1456{
1457 Assert(cbBuf >= 32);
1458 static RTSTRTUPLE const s_aModes[] =
1459 {
1460 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1461 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1462 /* [02] = */ { RT_STR_TUPLE("!2!") },
1463 /* [03] = */ { RT_STR_TUPLE("!3!") },
1464 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1465 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1466 /* [06] = */ { RT_STR_TUPLE("!6!") },
1467 /* [07] = */ { RT_STR_TUPLE("!7!") },
1468 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1469 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1470 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1471 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1472 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1473 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1474 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1475 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1476 /* [10] = */ { RT_STR_TUPLE("!10!") },
1477 /* [11] = */ { RT_STR_TUPLE("!11!") },
1478 /* [12] = */ { RT_STR_TUPLE("!12!") },
1479 /* [13] = */ { RT_STR_TUPLE("!13!") },
1480 /* [14] = */ { RT_STR_TUPLE("!14!") },
1481 /* [15] = */ { RT_STR_TUPLE("!15!") },
1482 /* [16] = */ { RT_STR_TUPLE("!16!") },
1483 /* [17] = */ { RT_STR_TUPLE("!17!") },
1484 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1485 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1486 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1487 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1488 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1489 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1490 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1491 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1492 };
1493 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1494 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1495 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1496
1497 pszBuf[off++] = ' ';
1498 pszBuf[off++] = 'C';
1499 pszBuf[off++] = 'P';
1500 pszBuf[off++] = 'L';
1501 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1502 Assert(off < 32);
1503
1504 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1505
1506 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1507 {
1508 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1509 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1510 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1511 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1512 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1513 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1514 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1515 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1516 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1517 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1518 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1519 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1520 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1521 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1522 };
1523 if (fFlags)
1524 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1525 if (s_aFlags[i].fFlag & fFlags)
1526 {
1527 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1528 pszBuf[off++] = ' ';
1529 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1530 off += s_aFlags[i].cchName;
1531 fFlags &= ~s_aFlags[i].fFlag;
1532 if (!fFlags)
1533 break;
1534 }
1535 pszBuf[off] = '\0';
1536
1537 return pszBuf;
1538}
1539
1540
1541/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1542static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1543{
1544 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1545 pDis->cbCachedInstr += cbMaxRead;
1546 RT_NOREF(cbMinRead);
1547 return VERR_NO_DATA;
1548}
1549
1550
1551/**
1552 * Worker for iemThreadedDisassembleTb.
1553 */
1554static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1555 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1556{
1557 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1558 {
1559 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1560 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1561 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1562 {
1563 PIEMTB pLookupTb = papTbLookup[iLookup];
1564 if (pLookupTb)
1565 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1566 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1567 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1568 : "invalid");
1569 else
1570 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1571 }
1572 pHlp->pfnPrintf(pHlp, "\n");
1573 }
1574 else
1575 {
1576 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1577 idxFirst, cEntries, pTb->cTbLookupEntries);
1578 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1579 }
1580}
1581
1582
1583DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1584{
1585 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1586
1587 char szDisBuf[512];
1588
1589 /*
1590 * Print TB info.
1591 */
1592 pHlp->pfnPrintf(pHlp,
1593 "pTb=%p: GCPhysPc=%RGp (%RGv) cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1594 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1595 pTb, pTb->GCPhysPc, pTb->FlatPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1596 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1597
1598 /*
1599 * This disassembly is driven by the debug info which follows the native
1600 * code and indicates when it starts with the next guest instructions,
1601 * where labels are and such things.
1602 */
1603 DISSTATE Dis;
1604 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1605 uint32_t const cCalls = pTb->Thrd.cCalls;
1606 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1607 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1608 : DISCPUMODE_64BIT;
1609 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1610 uint8_t idxRange = UINT8_MAX;
1611 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1612 uint32_t offRange = 0;
1613 uint32_t offOpcodes = 0;
1614 uint32_t const cbOpcodes = pTb->cbOpcodes;
1615 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1616 bool fTbLookupSeen0 = false;
1617
1618 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1619 {
1620 /*
1621 * New opcode range?
1622 */
1623 if ( idxRange == UINT8_MAX
1624 || idxRange >= cRanges
1625 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1626 {
1627 idxRange += 1;
1628 if (idxRange < cRanges)
1629 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1630 else
1631 continue;
1632 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1633 + (pTb->aRanges[idxRange].idxPhysPage == 0
1634 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1635 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1636 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1637 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1638 pTb->aRanges[idxRange].idxPhysPage);
1639 GCPhysPc += offRange;
1640 }
1641
1642 /*
1643 * Disassemble another guest instruction?
1644 */
1645 if ( paCalls[iCall].offOpcode != offOpcodes
1646 && paCalls[iCall].cbOpcode > 0
1647 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1648 {
1649 offOpcodes = paCalls[iCall].offOpcode;
1650 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1651 uint32_t cbInstr = 1;
1652 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1653 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1654 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1655 if (RT_SUCCESS(rc))
1656 {
1657 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1658 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1659 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1660 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1661 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1662 }
1663 else
1664 {
1665 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1666 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1667 cbInstr = paCalls[iCall].cbOpcode;
1668 }
1669 GCPhysPc += cbInstr;
1670 offRange += cbInstr;
1671 }
1672
1673 /*
1674 * Dump call details.
1675 */
1676 pHlp->pfnPrintf(pHlp,
1677 " Call #%u to %s (%u args)\n",
1678 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1679 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1680 if (paCalls[iCall].uTbLookup != 0)
1681 {
1682 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1683 fTbLookupSeen0 = idxFirst == 0;
1684 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1685 }
1686
1687 /*
1688 * Snoop fExec.
1689 */
1690 switch (paCalls[iCall].enmFunction)
1691 {
1692 default:
1693 break;
1694 case kIemThreadedFunc_BltIn_CheckMode:
1695 fExec = paCalls[iCall].auParams[0];
1696 break;
1697 }
1698 }
1699
1700 if (!fTbLookupSeen0)
1701 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1702}
1703
1704
1705
1706/**
1707 * Allocate a translation block for threadeded recompilation.
1708 *
1709 * This is allocated with maxed out call table and storage for opcode bytes,
1710 * because it's only supposed to be called once per EMT to allocate the TB
1711 * pointed to by IEMCPU::pThrdCompileTbR3.
1712 *
1713 * @returns Pointer to the translation block on success, NULL on failure.
1714 * @param pVM The cross context virtual machine structure.
1715 * @param pVCpu The cross context virtual CPU structure of the calling
1716 * thread.
1717 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1718 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1719 */
1720static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1721{
1722 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1723 if (pTb)
1724 {
1725 unsigned const cCalls = 256;
1726 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1727 if (pTb->Thrd.paCalls)
1728 {
1729 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1730 if (pTb->pabOpcodes)
1731 {
1732 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1733 pTb->Thrd.cAllocated = cCalls;
1734 pTb->Thrd.cCalls = 0;
1735 pTb->cbOpcodes = 0;
1736 pTb->pNext = NULL;
1737 pTb->cUsed = 0;
1738 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1739 pTb->idxAllocChunk = UINT8_MAX;
1740 pTb->GCPhysPc = GCPhysPc;
1741 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1742 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1743 pTb->cInstructions = 0;
1744 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1745
1746 /* Init the first opcode range. */
1747 pTb->cRanges = 1;
1748 pTb->aRanges[0].cbOpcodes = 0;
1749 pTb->aRanges[0].offOpcodes = 0;
1750 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1751 pTb->aRanges[0].u2Unused = 0;
1752 pTb->aRanges[0].idxPhysPage = 0;
1753 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1754 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1755
1756 return pTb;
1757 }
1758 RTMemFree(pTb->Thrd.paCalls);
1759 }
1760 RTMemFree(pTb);
1761 }
1762 RT_NOREF(pVM);
1763 return NULL;
1764}
1765
1766
1767/**
1768 * Called on the TB that are dedicated for recompilation before it's reused.
1769 *
1770 * @param pVCpu The cross context virtual CPU structure of the calling
1771 * thread.
1772 * @param pTb The translation block to reuse.
1773 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1774 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1775 */
1776static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1777{
1778 pTb->GCPhysPc = GCPhysPc;
1779 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1780 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1781 pTb->Thrd.cCalls = 0;
1782 pTb->cbOpcodes = 0;
1783 pTb->cInstructions = 0;
1784 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1785
1786 /* Init the first opcode range. */
1787 pTb->cRanges = 1;
1788 pTb->aRanges[0].cbOpcodes = 0;
1789 pTb->aRanges[0].offOpcodes = 0;
1790 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1791 pTb->aRanges[0].u2Unused = 0;
1792 pTb->aRanges[0].idxPhysPage = 0;
1793 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1794 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1795}
1796
1797
1798/**
1799 * Used to duplicate a threded translation block after recompilation is done.
1800 *
1801 * @returns Pointer to the translation block on success, NULL on failure.
1802 * @param pVM The cross context virtual machine structure.
1803 * @param pVCpu The cross context virtual CPU structure of the calling
1804 * thread.
1805 * @param pTbSrc The TB to duplicate.
1806 */
1807static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1808{
1809 /*
1810 * Just using the heap for now. Will make this more efficient and
1811 * complicated later, don't worry. :-)
1812 */
1813 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1814 if (pTb)
1815 {
1816 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1817 memcpy(pTb, pTbSrc, sizeof(*pTb));
1818 pTb->idxAllocChunk = idxAllocChunk;
1819
1820 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1821 Assert(cCalls > 0);
1822 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1823 if (pTb->Thrd.paCalls)
1824 {
1825 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1826 Assert(cbTbLookup > 0);
1827 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1828 Assert(cbOpcodes > 0);
1829 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1830 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1831 if (pbBoth)
1832 {
1833 RT_BZERO(pbBoth, cbTbLookup);
1834 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1835 pTb->Thrd.cAllocated = cCalls;
1836 pTb->pNext = NULL;
1837 pTb->cUsed = 0;
1838 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1839 pTb->fFlags = pTbSrc->fFlags;
1840
1841 return pTb;
1842 }
1843 RTMemFree(pTb->Thrd.paCalls);
1844 }
1845 iemTbAllocatorFree(pVCpu, pTb);
1846 }
1847 RT_NOREF(pVM);
1848 return NULL;
1849
1850}
1851
1852
1853/**
1854 * Adds the given TB to the hash table.
1855 *
1856 * @param pVCpu The cross context virtual CPU structure of the calling
1857 * thread.
1858 * @param pTbCache The cache to add it to.
1859 * @param pTb The translation block to add.
1860 */
1861static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1862{
1863 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1864
1865 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1866 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1867 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1868 if (LogIs12Enabled())
1869 {
1870 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1871 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1872 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1873 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1874 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1875 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1876 pTb->aRanges[idxRange].idxPhysPage == 0
1877 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1878 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1879 }
1880}
1881
1882
1883/**
1884 * Called by opcode verifier functions when they detect a problem.
1885 */
1886void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1887{
1888 /* We cannot free the current TB (indicated by fSafeToFree) because:
1889 - A threaded TB will have its current call entry accessed
1890 to update pVCpu->iem.s.cInstructions.
1891 - A native TB will have code left to execute. */
1892 if (fSafeToFree)
1893 iemTbAllocatorFree(pVCpu, pTb);
1894 else
1895 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1896}
1897
1898
1899/*
1900 * Real code.
1901 */
1902
1903#ifdef LOG_ENABLED
1904/**
1905 * Logs the current instruction.
1906 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1907 * @param pszFunction The IEM function doing the execution.
1908 * @param idxInstr The instruction number in the block.
1909 */
1910static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1911{
1912# ifdef IN_RING3
1913 if (LogIs2Enabled())
1914 {
1915 char szInstr[256];
1916 uint32_t cbInstr = 0;
1917 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1918 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1919 szInstr, sizeof(szInstr), &cbInstr);
1920
1921 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1922 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1923 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1924 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1925 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1926 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1927 " %s\n"
1928 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1929 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1930 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1931 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1932 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1933 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1934 szInstr));
1935
1936 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1937 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1938 }
1939 else
1940# endif
1941 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1942 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1943}
1944#endif /* LOG_ENABLED */
1945
1946
1947#if 0
1948static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1949{
1950 RT_NOREF(pVM, pVCpu);
1951 return rcStrict;
1952}
1953#endif
1954
1955
1956/**
1957 * Initializes the decoder state when compiling TBs.
1958 *
1959 * This presumes that fExec has already be initialized.
1960 *
1961 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1962 * to apply fixes to them as well.
1963 *
1964 * @param pVCpu The cross context virtual CPU structure of the calling
1965 * thread.
1966 * @param fReInit Clear for the first call for a TB, set for subsequent
1967 * calls from inside the compile loop where we can skip a
1968 * couple of things.
1969 * @param fExtraFlags The extra translation block flags when @a fReInit is
1970 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1971 * checked.
1972 */
1973DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1974{
1975 /* ASSUMES: That iemInitExec was already called and that anyone changing
1976 CPU state affecting the fExec bits since then will have updated fExec! */
1977 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1978 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1979
1980 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1981
1982 /* Decoder state: */
1983 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1984 pVCpu->iem.s.enmEffAddrMode = enmMode;
1985 if (enmMode != IEMMODE_64BIT)
1986 {
1987 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1988 pVCpu->iem.s.enmEffOpSize = enmMode;
1989 }
1990 else
1991 {
1992 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1993 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1994 }
1995 pVCpu->iem.s.fPrefixes = 0;
1996 pVCpu->iem.s.uRexReg = 0;
1997 pVCpu->iem.s.uRexB = 0;
1998 pVCpu->iem.s.uRexIndex = 0;
1999 pVCpu->iem.s.idxPrefix = 0;
2000 pVCpu->iem.s.uVex3rdReg = 0;
2001 pVCpu->iem.s.uVexLength = 0;
2002 pVCpu->iem.s.fEvexStuff = 0;
2003 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
2004 pVCpu->iem.s.offModRm = 0;
2005 pVCpu->iem.s.iNextMapping = 0;
2006
2007 if (!fReInit)
2008 {
2009 pVCpu->iem.s.cActiveMappings = 0;
2010 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
2011 pVCpu->iem.s.fEndTb = false;
2012 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
2013 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2014 pVCpu->iem.s.fTbCrossedPage = false;
2015 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
2016 pVCpu->iem.s.idxLastCheckIrqCallNo = UINT16_MAX;
2017 pVCpu->iem.s.fTbCurInstrIsSti = false;
2018 /* Force RF clearing and TF checking on first instruction in the block
2019 as we don't really know what came before and should assume the worst: */
2020 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
2021 }
2022 else
2023 {
2024 Assert(pVCpu->iem.s.cActiveMappings == 0);
2025 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
2026 Assert(pVCpu->iem.s.fEndTb == false);
2027 Assert(pVCpu->iem.s.fTbCrossedPage == false);
2028 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
2029 }
2030 pVCpu->iem.s.fTbCurInstr = 0;
2031
2032#ifdef DBGFTRACE_ENABLED
2033 switch (IEM_GET_CPU_MODE(pVCpu))
2034 {
2035 case IEMMODE_64BIT:
2036 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
2037 break;
2038 case IEMMODE_32BIT:
2039 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2040 break;
2041 case IEMMODE_16BIT:
2042 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2043 break;
2044 }
2045#endif
2046}
2047
2048
2049/**
2050 * Initializes the opcode fetcher when starting the compilation.
2051 *
2052 * @param pVCpu The cross context virtual CPU structure of the calling
2053 * thread.
2054 */
2055DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
2056{
2057 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
2058#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2059 pVCpu->iem.s.offOpcode = 0;
2060#else
2061 RT_NOREF(pVCpu);
2062#endif
2063}
2064
2065
2066/**
2067 * Re-initializes the opcode fetcher between instructions while compiling.
2068 *
2069 * @param pVCpu The cross context virtual CPU structure of the calling
2070 * thread.
2071 */
2072DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
2073{
2074 if (pVCpu->iem.s.pbInstrBuf)
2075 {
2076 uint64_t off = pVCpu->cpum.GstCtx.rip;
2077 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2078 off += pVCpu->cpum.GstCtx.cs.u64Base;
2079 off -= pVCpu->iem.s.uInstrBufPc;
2080 if (off < pVCpu->iem.s.cbInstrBufTotal)
2081 {
2082 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2083 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2084 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2085 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2086 else
2087 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2088 }
2089 else
2090 {
2091 pVCpu->iem.s.pbInstrBuf = NULL;
2092 pVCpu->iem.s.offInstrNextByte = 0;
2093 pVCpu->iem.s.offCurInstrStart = 0;
2094 pVCpu->iem.s.cbInstrBuf = 0;
2095 pVCpu->iem.s.cbInstrBufTotal = 0;
2096 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2097 }
2098 }
2099 else
2100 {
2101 pVCpu->iem.s.offInstrNextByte = 0;
2102 pVCpu->iem.s.offCurInstrStart = 0;
2103 pVCpu->iem.s.cbInstrBuf = 0;
2104 pVCpu->iem.s.cbInstrBufTotal = 0;
2105#ifdef VBOX_STRICT
2106 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2107#endif
2108 }
2109#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2110 pVCpu->iem.s.offOpcode = 0;
2111#endif
2112}
2113
2114#ifdef LOG_ENABLED
2115
2116/**
2117 * Inserts a NOP call.
2118 *
2119 * This is for debugging.
2120 *
2121 * @returns true on success, false if we're out of call entries.
2122 * @param pTb The translation block being compiled.
2123 */
2124bool iemThreadedCompileEmitNop(PIEMTB pTb)
2125{
2126 /* Emit the call. */
2127 uint32_t const idxCall = pTb->Thrd.cCalls;
2128 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2129 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2130 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2131 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2132 pCall->idxInstr = pTb->cInstructions - 1;
2133 pCall->cbOpcode = 0;
2134 pCall->offOpcode = 0;
2135 pCall->uTbLookup = 0;
2136 pCall->fFlags = 0;
2137 pCall->auParams[0] = 0;
2138 pCall->auParams[1] = 0;
2139 pCall->auParams[2] = 0;
2140 return true;
2141}
2142
2143
2144/**
2145 * Called by iemThreadedCompile if cpu state logging is desired.
2146 *
2147 * @returns true on success, false if we're out of call entries.
2148 * @param pTb The translation block being compiled.
2149 */
2150bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2151{
2152 /* Emit the call. */
2153 uint32_t const idxCall = pTb->Thrd.cCalls;
2154 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2155 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2156 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2157 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2158 pCall->idxInstr = pTb->cInstructions - 1;
2159 pCall->cbOpcode = 0;
2160 pCall->offOpcode = 0;
2161 pCall->uTbLookup = 0;
2162 pCall->fFlags = 0;
2163 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2164 pCall->auParams[1] = 0;
2165 pCall->auParams[2] = 0;
2166 return true;
2167}
2168
2169#endif /* LOG_ENABLED */
2170
2171DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2172{
2173 switch (cbInstr)
2174 {
2175 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2176 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2177 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2178 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2179 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2180 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2181 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2182 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2183 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2184 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2185 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2186 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2187 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2188 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2189 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2190 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2191 }
2192}
2193
2194#ifdef IEM_WITH_INTRA_TB_JUMPS
2195
2196/**
2197 * Emits the necessary tail calls for a full TB loop-jump.
2198 */
2199static bool iemThreadedCompileFullTbJump(PVMCPUCC pVCpu, PIEMTB pTb)
2200{
2201 /*
2202 * We need a timer and maybe IRQ check before jumping, so make sure
2203 * we've got sufficient call entries left before emitting anything.
2204 */
2205 uint32_t idxCall = pTb->Thrd.cCalls;
2206 if (idxCall + 1U <= pTb->Thrd.cAllocated)
2207 {
2208 /*
2209 * We're good, emit the calls.
2210 */
2211 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2212 pTb->Thrd.cCalls = (uint16_t)(idxCall + 2);
2213
2214 /* Always check timers as we risk getting stuck in a loop otherwise. We
2215 combine it with an IRQ check if that's not performed in the TB already. */
2216 pCall->enmFunction = pVCpu->iem.s.idxLastCheckIrqCallNo < idxCall
2217 ? kIemThreadedFunc_BltIn_CheckTimers
2218 : kIemThreadedFunc_BltIn_CheckTimersAndIrq;
2219 pCall->idxInstr = 0;
2220 pCall->offOpcode = 0;
2221 pCall->cbOpcode = 0;
2222 pCall->uTbLookup = 0;
2223 pCall->fFlags = 0;
2224 pCall->auParams[0] = 0;
2225 pCall->auParams[1] = 0;
2226 pCall->auParams[2] = 0;
2227 pCall++;
2228
2229 /* The jump callentry[0]. */
2230 pCall->enmFunction = kIemThreadedFunc_BltIn_Jump;
2231 pCall->idxInstr = 0;
2232 pCall->offOpcode = 0;
2233 pCall->cbOpcode = 0;
2234 pCall->uTbLookup = 0;
2235 pCall->fFlags = 0;
2236 pCall->auParams[0] = 0; /* jump target is call zero */
2237 pCall->auParams[1] = 0;
2238 pCall->auParams[2] = 0;
2239
2240 /* Mark callentry #0 as a jump target. */
2241 pTb->Thrd.paCalls[0].fFlags |= IEMTHREADEDCALLENTRY_F_JUMP_TARGET;
2242 }
2243
2244 return false;
2245}
2246
2247/**
2248 * Called by IEM_MC2_BEGIN_EMIT_CALLS when it detects that we're back at the
2249 * first instruction and we didn't just branch to it (that's handled below).
2250 *
2251 * This will emit a loop iff everything is compatible with that.
2252 */
2253DECLHIDDEN(int) iemThreadedCompileBackAtFirstInstruction(PVMCPU pVCpu, PIEMTB pTb) RT_NOEXCEPT
2254{
2255 /* Check if the mode matches. */
2256 if ( (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2257 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS))
2258 {
2259 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected2);
2260 iemThreadedCompileFullTbJump(pVCpu, pTb);
2261 }
2262 return VINF_IEM_RECOMPILE_END_TB;
2263}
2264
2265#endif /* IEM_WITH_INTRA_TB_JUMPS */
2266
2267
2268/**
2269 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2270 *
2271 * - CS LIM check required.
2272 * - Must recheck opcode bytes.
2273 * - Previous instruction branched.
2274 * - TLB load detected, probably due to page crossing.
2275 *
2276 * @returns true if everything went well, false if we're out of space in the TB
2277 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2278 * @param pVCpu The cross context virtual CPU structure of the calling
2279 * thread.
2280 * @param pTb The translation block being compiled.
2281 */
2282bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2283{
2284 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2285 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2286#if 0
2287 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2288 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2289#endif
2290
2291 /*
2292 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2293 * see if it's needed to start checking.
2294 */
2295 bool fConsiderCsLimChecking;
2296 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2297 if ( fMode == IEM_F_MODE_X86_64BIT
2298 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2299 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2300 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2301 fConsiderCsLimChecking = false; /* already enabled or not needed */
2302 else
2303 {
2304 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2305 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2306 fConsiderCsLimChecking = true; /* likely */
2307 else
2308 {
2309 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2310 return false;
2311 }
2312 }
2313
2314 /*
2315 * Prepare call now, even before we know if can accept the instruction in this TB.
2316 * This allows us amending parameters w/o making every case suffer.
2317 */
2318 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2319 uint16_t const offOpcode = pTb->cbOpcodes;
2320 uint8_t idxRange = pTb->cRanges - 1;
2321
2322 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2323 pCall->idxInstr = pTb->cInstructions;
2324 pCall->cbOpcode = cbInstr;
2325 pCall->offOpcode = offOpcode;
2326 pCall->uTbLookup = 0;
2327 pCall->fFlags = 0;
2328 pCall->auParams[0] = (uint32_t)cbInstr
2329 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2330 /* The upper dword is sometimes used for cbStartPage. */;
2331 pCall->auParams[1] = idxRange;
2332 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2333
2334/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2335 * gotten onto. If we do, stop */
2336
2337 /*
2338 * Case 1: We've branched (RIP changed).
2339 *
2340 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2341 * TB, end the TB here as it is most likely a loop and if it
2342 * made sense to unroll it, the guest code compiler should've
2343 * done it already.
2344 *
2345 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2346 * Req: 1 extra range, no extra phys.
2347 *
2348 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2349 * necessary (fTbCrossedPage is true).
2350 * Req: 1 extra range, probably 1 extra phys page entry.
2351 *
2352 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2353 * but in addition we cross into the following page and require
2354 * another TLB load.
2355 * Req: 2 extra ranges, probably 2 extra phys page entries.
2356 *
2357 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2358 * the following page (thus fTbCrossedPage is true).
2359 * Req: 2 extra ranges, probably 1 extra phys page entry.
2360 *
2361 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2362 * it may trigger "spuriously" from the CPU point of view because of
2363 * physical page changes that'll invalid the physical TLB and trigger a
2364 * call to the function. In theory this be a big deal, just a bit
2365 * performance loss as we'll pick the LoadingTlb variants.
2366 *
2367 * Note! We do not currently optimize branching to the next instruction (sorry
2368 * 32-bit PIC code). We could maybe do that in the branching code that
2369 * sets (or not) fTbBranched.
2370 */
2371 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2372 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2373 * code. This'll require filtering out far jmps and calls, as they
2374 * load CS which should technically be considered indirect since the
2375 * GDT/LDT entry's base address can be modified independently from
2376 * the code. */
2377 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2378 {
2379 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2380 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2381 {
2382 /* 1a + 1b - instruction fully within the branched to page. */
2383 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2384 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2385
2386 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2387 {
2388 /* Check that we've got a free range. */
2389 idxRange += 1;
2390 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2391 { /* likely */ }
2392 else
2393 {
2394 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2395 return false;
2396 }
2397 pCall->auParams[1] = idxRange;
2398 pCall->auParams[2] = 0;
2399
2400 /* Check that we've got a free page slot. */
2401 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2402 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2403 uint8_t idxPhysPage;
2404 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2405 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2406 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2407 {
2408 pTb->aGCPhysPages[0] = GCPhysNew;
2409 pTb->aRanges[idxRange].idxPhysPage = 1;
2410 idxPhysPage = UINT8_MAX;
2411 }
2412 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2413 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2414 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2415 {
2416 pTb->aGCPhysPages[1] = GCPhysNew;
2417 pTb->aRanges[idxRange].idxPhysPage = 2;
2418 idxPhysPage = UINT8_MAX;
2419 }
2420 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2421 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2422 else
2423 {
2424 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2425 return false;
2426 }
2427
2428 /* Loop check: We weave the loop check in here to optimize the lookup. */
2429 if (idxPhysPage != UINT8_MAX)
2430 {
2431 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2432 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2433 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2434 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2435 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2436 {
2437 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2438#ifdef IEM_WITH_INTRA_TB_JUMPS
2439 /* If we're looping back to the start of the TB and the mode is still the same,
2440 we could emit a jump optimization. For now we don't do page transitions
2441 as that implies TLB loading and such. */
2442 if ( idxLoopRange == 0
2443 && offPhysPc == pTb->aRanges[0].offPhysPage
2444 && (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2445 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS)
2446 && (pVCpu->iem.s.fTbBranched & ( IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR
2447 | IEMBRANCHED_F_STACK | IEMBRANCHED_F_RELATIVE))
2448 == IEMBRANCHED_F_RELATIVE)
2449 {
2450 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected);
2451 return iemThreadedCompileFullTbJump(pVCpu, pTb);
2452 }
2453#endif
2454 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2455 return false;
2456 }
2457 }
2458
2459 /* Finish setting up the new range. */
2460 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2461 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2462 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2463 pTb->aRanges[idxRange].u2Unused = 0;
2464 pTb->cRanges++;
2465 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2466 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2467 pTb->aRanges[idxRange].offOpcodes));
2468 }
2469 else
2470 {
2471 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2472 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2473 }
2474
2475 /* Determin which function we need to load & check.
2476 Note! For jumps to a new page, we'll set both fTbBranched and
2477 fTbCrossedPage to avoid unnecessary TLB work for intra
2478 page branching */
2479 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2480 || pVCpu->iem.s.fTbCrossedPage)
2481 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2482 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2483 : !fConsiderCsLimChecking
2484 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2485 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2486 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2487 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2488 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2489 : !fConsiderCsLimChecking
2490 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2491 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2492 else
2493 {
2494 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2495 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2496 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2497 : !fConsiderCsLimChecking
2498 ? kIemThreadedFunc_BltIn_CheckOpcodes
2499 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2500 }
2501 }
2502 else
2503 {
2504 /* 1c + 1d - instruction crosses pages. */
2505 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2506 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2507
2508 /* Lazy bird: Check that this isn't case 1c, since we've already
2509 load the first physical address. End the TB and
2510 make it a case 2b instead.
2511
2512 Hmm. Too much bother to detect, so just do the same
2513 with case 1d as well. */
2514#if 0 /** @todo get back to this later when we've got the actual branch code in
2515 * place. */
2516 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2517
2518 /* Check that we've got two free ranges. */
2519 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2520 { /* likely */ }
2521 else
2522 return false;
2523 idxRange += 1;
2524 pCall->auParams[1] = idxRange;
2525 pCall->auParams[2] = 0;
2526
2527 /* ... */
2528
2529#else
2530 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2531 return false;
2532#endif
2533 }
2534 }
2535
2536 /*
2537 * Case 2: Page crossing.
2538 *
2539 * Sub-case 2a: The instruction starts on the first byte in the next page.
2540 *
2541 * Sub-case 2b: The instruction has opcode bytes in both the current and
2542 * following page.
2543 *
2544 * Both cases requires a new range table entry and probably a new physical
2545 * page entry. The difference is in which functions to emit and whether to
2546 * add bytes to the current range.
2547 */
2548 else if (pVCpu->iem.s.fTbCrossedPage)
2549 {
2550 /* Check that we've got a free range. */
2551 idxRange += 1;
2552 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2553 { /* likely */ }
2554 else
2555 {
2556 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2557 return false;
2558 }
2559
2560 /* Check that we've got a free page slot. */
2561 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2562 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2563 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2564 pTb->aRanges[idxRange].idxPhysPage = 0;
2565 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2566 || pTb->aGCPhysPages[0] == GCPhysNew)
2567 {
2568 pTb->aGCPhysPages[0] = GCPhysNew;
2569 pTb->aRanges[idxRange].idxPhysPage = 1;
2570 }
2571 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2572 || pTb->aGCPhysPages[1] == GCPhysNew)
2573 {
2574 pTb->aGCPhysPages[1] = GCPhysNew;
2575 pTb->aRanges[idxRange].idxPhysPage = 2;
2576 }
2577 else
2578 {
2579 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2580 return false;
2581 }
2582
2583 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2584 {
2585 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2586 pCall->auParams[1] = idxRange;
2587 pCall->auParams[2] = 0;
2588
2589 /* Finish setting up the new range. */
2590 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2591 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2592 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2593 pTb->aRanges[idxRange].u2Unused = 0;
2594 pTb->cRanges++;
2595 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2596 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2597 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2598
2599 /* Determin which function we need to load & check. */
2600 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2601 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2602 : !fConsiderCsLimChecking
2603 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2604 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2605 }
2606 else
2607 {
2608 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2609 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2610 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2611 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2612
2613 /* We've good. Split the instruction over the old and new range table entries. */
2614 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2615
2616 pTb->aRanges[idxRange].offPhysPage = 0;
2617 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2618 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2619 pTb->aRanges[idxRange].u2Unused = 0;
2620 pTb->cRanges++;
2621 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2622 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2623 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2624
2625 /* Determin which function we need to load & check. */
2626 if (pVCpu->iem.s.fTbCheckOpcodes)
2627 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2628 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2629 : !fConsiderCsLimChecking
2630 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2631 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2632 else
2633 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2634 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2635 : !fConsiderCsLimChecking
2636 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2637 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2638 }
2639 }
2640
2641 /*
2642 * Regular case: No new range required.
2643 */
2644 else
2645 {
2646 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2647 if (pVCpu->iem.s.fTbCheckOpcodes)
2648 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2649 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2650 : kIemThreadedFunc_BltIn_CheckOpcodes;
2651 else
2652 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2653
2654 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2655 pTb->cbOpcodes = offOpcode + cbInstr;
2656 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2657 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2658 }
2659
2660 /*
2661 * Commit the call.
2662 */
2663 pTb->Thrd.cCalls++;
2664
2665 /*
2666 * Clear state.
2667 */
2668 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2669 pVCpu->iem.s.fTbCrossedPage = false;
2670 pVCpu->iem.s.fTbCheckOpcodes = false;
2671
2672 /*
2673 * Copy opcode bytes.
2674 */
2675 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2676 pTb->cbOpcodes = offOpcode + cbInstr;
2677 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2678
2679 return true;
2680}
2681
2682
2683/**
2684 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2685 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2686 *
2687 * @returns true if anything is pending, false if not.
2688 * @param pVCpu The cross context virtual CPU structure of the calling
2689 * thread.
2690 */
2691DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2692{
2693 uint64_t fCpu = pVCpu->fLocalForcedActions;
2694 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2695#if 1
2696 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2697 if (RT_LIKELY( !fCpu
2698 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2699 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2700 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2701 return false;
2702 return true;
2703#else
2704 return false;
2705#endif
2706
2707}
2708
2709
2710/**
2711 * Called by iemThreadedCompile when a block requires a mode check.
2712 *
2713 * @returns true if we should continue, false if we're out of call entries.
2714 * @param pVCpu The cross context virtual CPU structure of the calling
2715 * thread.
2716 * @param pTb The translation block being compiled.
2717 */
2718static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2719{
2720 /* Emit the call. */
2721 uint32_t const idxCall = pTb->Thrd.cCalls;
2722 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2723 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2724 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2725 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2726 pCall->idxInstr = pTb->cInstructions - 1;
2727 pCall->cbOpcode = 0;
2728 pCall->offOpcode = 0;
2729 pCall->uTbLookup = 0;
2730 pCall->fFlags = 0;
2731 pCall->auParams[0] = pVCpu->iem.s.fExec;
2732 pCall->auParams[1] = 0;
2733 pCall->auParams[2] = 0;
2734 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2735 return true;
2736}
2737
2738
2739/**
2740 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2741 * set.
2742 *
2743 * @returns true if we should continue, false if an IRQ is deliverable or a
2744 * relevant force flag is pending.
2745 * @param pVCpu The cross context virtual CPU structure of the calling
2746 * thread.
2747 * @param pTb The translation block being compiled.
2748 * @sa iemThreadedCompileCheckIrq
2749 */
2750bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2751{
2752 /*
2753 * Skip this we've already emitted a call after the previous instruction
2754 * or if it's the first call, as we're always checking FFs between blocks.
2755 */
2756 uint32_t const idxCall = pTb->Thrd.cCalls;
2757 if ( idxCall > 0
2758 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2759 {
2760 /* Emit the call. */
2761 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2762 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2763 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2764 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2765 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2766 pCall->idxInstr = pTb->cInstructions;
2767 pCall->offOpcode = 0;
2768 pCall->cbOpcode = 0;
2769 pCall->uTbLookup = 0;
2770 pCall->fFlags = 0;
2771 pCall->auParams[0] = 0;
2772 pCall->auParams[1] = 0;
2773 pCall->auParams[2] = 0;
2774 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2775
2776 /* Reset the IRQ check value. */
2777 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2778
2779 /*
2780 * Check for deliverable IRQs and pending force flags.
2781 */
2782 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2783 }
2784 return true; /* continue */
2785}
2786
2787
2788/**
2789 * Emits an IRQ check call and checks for pending IRQs.
2790 *
2791 * @returns true if we should continue, false if an IRQ is deliverable or a
2792 * relevant force flag is pending.
2793 * @param pVCpu The cross context virtual CPU structure of the calling
2794 * thread.
2795 * @param pTb The transation block.
2796 * @sa iemThreadedCompileBeginEmitCallsComplications
2797 */
2798static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2799{
2800 /* Check again in a little bit, unless it is immediately following an STI
2801 in which case we *must* check immediately after the next instruction
2802 as well in case it's executed with interrupt inhibition. We could
2803 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2804 bs3-timers-1 which is doing sti + sti + cli. */
2805 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2806 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2807 else
2808 {
2809 pVCpu->iem.s.fTbCurInstrIsSti = false;
2810 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2811 }
2812 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2813
2814 /*
2815 * Emit the call.
2816 */
2817 uint32_t const idxCall = pTb->Thrd.cCalls;
2818 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2819 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2820 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2821 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2822 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2823 pCall->idxInstr = pTb->cInstructions;
2824 pCall->offOpcode = 0;
2825 pCall->cbOpcode = 0;
2826 pCall->uTbLookup = 0;
2827 pCall->fFlags = 0;
2828 pCall->auParams[0] = 0;
2829 pCall->auParams[1] = 0;
2830 pCall->auParams[2] = 0;
2831
2832 /*
2833 * Check for deliverable IRQs and pending force flags.
2834 */
2835 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2836}
2837
2838
2839/**
2840 * Compiles a new TB and executes it.
2841 *
2842 * We combine compilation and execution here as it makes it simpler code flow
2843 * in the main loop and it allows interpreting while compiling if we want to
2844 * explore that option.
2845 *
2846 * @returns Strict VBox status code.
2847 * @param pVM The cross context virtual machine structure.
2848 * @param pVCpu The cross context virtual CPU structure of the calling
2849 * thread.
2850 * @param GCPhysPc The physical address corresponding to the current
2851 * RIP+CS.BASE.
2852 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2853 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2854 */
2855static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC
2856iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2857{
2858 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2859 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2860 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2861
2862 /*
2863 * Get the TB we use for the recompiling. This is a maxed-out TB so
2864 * that'll we'll make a more efficient copy of when we're done compiling.
2865 */
2866 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2867 if (pTb)
2868 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2869 else
2870 {
2871 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2872 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2873 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2874 }
2875 pTb->FlatPc = pVCpu->iem.s.uInstrBufPc | (GCPhysPc & GUEST_PAGE_OFFSET_MASK);
2876
2877 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2878 functions may get at it. */
2879 pVCpu->iem.s.pCurTbR3 = pTb;
2880
2881#if 0
2882 /* Make sure the CheckIrq condition matches the one in EM. */
2883 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2884 const uint32_t cZeroCalls = 1;
2885#else
2886 const uint32_t cZeroCalls = 0;
2887#endif
2888
2889 /*
2890 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2891 */
2892 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2893 iemThreadedCompileInitOpcodeFetching(pVCpu);
2894 VBOXSTRICTRC rcStrict;
2895 for (;;)
2896 {
2897 /* Process the next instruction. */
2898#ifdef LOG_ENABLED
2899 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2900 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2901 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2902 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2903#endif
2904 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2905 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2906
2907 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2908#if 0
2909 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2910 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2911 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2912#endif
2913 if ( rcStrict == VINF_SUCCESS
2914 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2915 && !pVCpu->iem.s.fEndTb)
2916 {
2917 Assert(pTb->Thrd.cCalls > cCallsPrev);
2918 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2919
2920 pVCpu->iem.s.cInstructions++;
2921
2922 /* Check for mode change _after_ certain CIMPL calls, so check that
2923 we continue executing with the same mode value. */
2924 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2925 { /* probable */ }
2926 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2927 { /* extremely likely */ }
2928 else
2929 break;
2930
2931#if defined(LOG_ENABLED) && 0 /* for debugging */
2932 //iemThreadedCompileEmitNop(pTb);
2933 iemThreadedCompileEmitLogCpuState(pTb);
2934#endif
2935 }
2936 else
2937 {
2938 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2939 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2940 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2941 rcStrict = VINF_SUCCESS;
2942
2943 if (pTb->Thrd.cCalls > cZeroCalls)
2944 {
2945 if (cCallsPrev != pTb->Thrd.cCalls)
2946 pVCpu->iem.s.cInstructions++;
2947 break;
2948 }
2949
2950 pVCpu->iem.s.pCurTbR3 = NULL;
2951 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2952 }
2953
2954 /* Check for IRQs? */
2955 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2956 pVCpu->iem.s.cInstrTillIrqCheck--;
2957 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2958 break;
2959
2960 /* Still space in the TB? */
2961 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2962 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2963 && pTb->cTbLookupEntries < 127)
2964 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2965 else
2966 {
2967 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2968 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2969 break;
2970 }
2971 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2972 }
2973
2974 /*
2975 * Reserve lookup space for the final call entry if necessary.
2976 */
2977 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2978 if (pTb->Thrd.cCalls > 1)
2979 {
2980 if (pFinalCall->uTbLookup == 0)
2981 {
2982 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2983 pTb->cTbLookupEntries += 1;
2984 }
2985 }
2986 else if (pFinalCall->uTbLookup != 0)
2987 {
2988 Assert(pTb->cTbLookupEntries > 1);
2989 pFinalCall->uTbLookup -= 1;
2990 pTb->cTbLookupEntries -= 1;
2991 }
2992
2993 /*
2994 * Duplicate the TB into a completed one and link it.
2995 */
2996 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2997 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2998
2999 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
3000
3001#ifdef IEM_COMPILE_ONLY_MODE
3002 /*
3003 * Execute the translation block.
3004 */
3005#endif
3006
3007 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3008}
3009
3010
3011
3012/*********************************************************************************************************************************
3013* Threaded Translation Block Saving and Restoring for Profiling the Native Recompiler *
3014*********************************************************************************************************************************/
3015#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3016# include <iprt/message.h>
3017
3018static const SSMFIELD g_aIemThreadedTbFields[] =
3019{
3020 SSMFIELD_ENTRY( IEMTB, cUsed),
3021 SSMFIELD_ENTRY( IEMTB, msLastUsed),
3022 SSMFIELD_ENTRY_GCPHYS(IEMTB, GCPhysPc),
3023 SSMFIELD_ENTRY( IEMTB, fFlags),
3024 SSMFIELD_ENTRY( IEMTB, x86.fAttr),
3025 SSMFIELD_ENTRY( IEMTB, cRanges),
3026 SSMFIELD_ENTRY( IEMTB, cInstructions),
3027 SSMFIELD_ENTRY( IEMTB, Thrd.cCalls),
3028 SSMFIELD_ENTRY( IEMTB, cTbLookupEntries),
3029 SSMFIELD_ENTRY( IEMTB, cbOpcodes),
3030 SSMFIELD_ENTRY( IEMTB, FlatPc),
3031 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[0]),
3032 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[1]),
3033 SSMFIELD_ENTRY_TERM()
3034};
3035
3036/**
3037 * Saves a threaded TB to a dedicated saved state file.
3038 */
3039static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb)
3040{
3041 /* Only VCPU #0 for now. */
3042 if (pVCpu->idCpu != 0)
3043 return;
3044
3045 /*
3046 * Get the SSM handle, lazily opening the output file.
3047 */
3048 PSSMHANDLE const pNil = (PSSMHANDLE)~(uintptr_t)0; Assert(!RT_VALID_PTR(pNil));
3049 PSSMHANDLE pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3050 if (pSSM && pSSM != pNil)
3051 { /* likely */ }
3052 else if (pSSM)
3053 return;
3054 else
3055 {
3056 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil;
3057 int rc = SSMR3Open("ThreadedTBsForRecompilerProfiling.sav", NULL, NULL, SSM_OPEN_F_FOR_WRITING, &pSSM);
3058 AssertLogRelRCReturnVoid(rc);
3059
3060 rc = SSMR3WriteFileHeader(pSSM, 1);
3061 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3062
3063 rc = SSMR3WriteUnitBegin(pSSM, "threaded-tbs", 1, 0);
3064 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3065 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pSSM;
3066 }
3067
3068 /*
3069 * Do the actual saving.
3070 */
3071 SSMR3PutU32(pSSM, 0); /* Indicates that another TB follows. */
3072
3073 /* The basic structure. */
3074 SSMR3PutStructEx(pSSM, pTb, sizeof(*pTb), 0 /*fFlags*/, g_aIemThreadedTbFields, NULL);
3075
3076 /* The ranges. */
3077 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3078 {
3079 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offOpcodes);
3080 SSMR3PutU16(pSSM, pTb->aRanges[iRange].cbOpcodes);
3081 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offPhysPage | (pTb->aRanges[iRange].idxPhysPage << 14));
3082 }
3083
3084 /* The opcodes. */
3085 SSMR3PutMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3086
3087 /* The threaded call table. */
3088 int rc = SSMR3PutMem(pSSM, pTb->Thrd.paCalls, sizeof(*pTb->Thrd.paCalls) * pTb->Thrd.cCalls);
3089 AssertLogRelMsgStmt(RT_SUCCESS(rc), ("rc=%Rrc\n", rc), pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil);
3090}
3091
3092
3093/**
3094 * Called by IEMR3Term to finish any open profile files.
3095 *
3096 * @note This is not called on the EMT for @a pVCpu, but rather on the thread
3097 * driving the VM termination.
3098 */
3099DECLHIDDEN(void) iemThreadedSaveTbForProfilingCleanup(PVMCPU pVCpu)
3100{
3101 PSSMHANDLE const pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3102 pVCpu->iem.s.pSsmThreadedTbsForProfiling = NULL;
3103 if (RT_VALID_PTR(pSSM))
3104 {
3105 /* Indicate that this is the end. */
3106 SSMR3PutU32(pSSM, UINT32_MAX);
3107
3108 int rc = SSMR3WriteUnitComplete(pSSM);
3109 AssertLogRelRC(rc);
3110 rc = SSMR3WriteFileFooter(pSSM);
3111 AssertLogRelRC(rc);
3112 rc = SSMR3Close(pSSM);
3113 AssertLogRelRC(rc);
3114 }
3115}
3116
3117#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER && VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING */
3118
3119#ifdef IN_RING3
3120/**
3121 * API use to process what iemThreadedSaveTbForProfiling() saved.
3122 *
3123 * @note Do not mix build types or revisions. Local changes between saving the
3124 * TBs and calling this API may cause unexpected trouble.
3125 */
3126VMMR3DECL(int) IEMR3ThreadedProfileRecompilingSavedTbs(PVM pVM, const char *pszFilename, uint32_t cMinTbs)
3127{
3128# if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3129 PVMCPU const pVCpu = pVM->apCpusR3[0];
3130
3131 /* We need to keep an eye on the TB allocator. */
3132 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
3133
3134 /*
3135 * Load the TBs from the file.
3136 */
3137 PSSMHANDLE pSSM = NULL;
3138 int rc = SSMR3Open(pszFilename, NULL, NULL, 0, &pSSM);
3139 if (RT_SUCCESS(rc))
3140 {
3141 uint32_t cTbs = 0;
3142 PIEMTB pTbHead = NULL;
3143 PIEMTB *ppTbTail = &pTbHead;
3144 uint32_t uVersion;
3145 rc = SSMR3Seek(pSSM, "threaded-tbs", 0, &uVersion);
3146 if (RT_SUCCESS(rc))
3147 {
3148 for (;; cTbs++)
3149 {
3150 /* Check for the end tag. */
3151 uint32_t uTag = 0;
3152 rc = SSMR3GetU32(pSSM, &uTag);
3153 AssertRCBreak(rc);
3154 if (uTag == UINT32_MAX)
3155 break;
3156 AssertBreakStmt(uTag == 0, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3157
3158 /* Do we have room for another TB? */
3159 if (pTbAllocator->cInUseTbs + 2 >= pTbAllocator->cMaxTbs)
3160 {
3161 RTMsgInfo("Too many TBs to load, stopping loading early.\n");
3162 break;
3163 }
3164
3165 /* Allocate a new TB. */
3166 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
3167 AssertBreakStmt(uTag == 0, rc = VERR_OUT_OF_RESOURCES);
3168
3169 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
3170 RT_ZERO(*pTb);
3171 pTb->idxAllocChunk = idxAllocChunk;
3172
3173 rc = SSMR3GetStructEx(pSSM, pTb, sizeof(*pTb), 0, g_aIemThreadedTbFields, NULL);
3174 if (RT_SUCCESS(rc))
3175 {
3176 AssertStmt(pTb->Thrd.cCalls > 0 && pTb->Thrd.cCalls <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3177 AssertStmt(pTb->cbOpcodes > 0 && pTb->cbOpcodes <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3178 AssertStmt(pTb->cRanges > 0 && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges), rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3179 AssertStmt(pTb->cTbLookupEntries > 0 && pTb->cTbLookupEntries <= 136, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3180
3181 if (RT_SUCCESS(rc))
3182 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3183 {
3184 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].offOpcodes);
3185 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].cbOpcodes);
3186 uint16_t uTmp = 0;
3187 rc = SSMR3GetU16(pSSM, &uTmp);
3188 AssertRCBreak(rc);
3189 pTb->aRanges[iRange].offPhysPage = uTmp & GUEST_PAGE_OFFSET_MASK;
3190 pTb->aRanges[iRange].idxPhysPage = uTmp >> 14;
3191
3192 AssertBreakStmt(pTb->aRanges[iRange].idxPhysPage <= RT_ELEMENTS(pTb->aGCPhysPages),
3193 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3194 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes < pTb->cbOpcodes,
3195 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3196 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes + pTb->aRanges[iRange].cbOpcodes <= pTb->cbOpcodes,
3197 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3198 }
3199
3200 if (RT_SUCCESS(rc))
3201 {
3202 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAllocZ(sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3203 if (pTb->Thrd.paCalls)
3204 {
3205 size_t const cbTbLookup = pTb->cTbLookupEntries * sizeof(PIEMTB);
3206 Assert(cbTbLookup > 0);
3207 size_t const cbOpcodes = pTb->cbOpcodes;
3208 Assert(cbOpcodes > 0);
3209 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
3210 uint8_t * const pbBoth = (uint8_t *)RTMemAllocZ(cbBoth);
3211 if (pbBoth)
3212 {
3213 pTb->pabOpcodes = &pbBoth[cbTbLookup];
3214 SSMR3GetMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3215 rc = SSMR3GetMem(pSSM, pTb->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3216 if (RT_SUCCESS(rc))
3217 {
3218 *ppTbTail = pTb;
3219 ppTbTail = &pTb->pNext;
3220 continue;
3221 }
3222 }
3223 else
3224 rc = VERR_NO_MEMORY;
3225 RTMemFree(pTb->Thrd.paCalls);
3226 }
3227 else
3228 rc = VERR_NO_MEMORY;
3229 }
3230 }
3231 iemTbAllocatorFree(pVCpu, pTb);
3232 break;
3233 }
3234 if (RT_FAILURE(rc))
3235 RTMsgError("Load error: %Rrc (cTbs=%u)", rc, cTbs);
3236 }
3237 else
3238 RTMsgError("SSMR3Seek failed on '%s': %Rrc", pszFilename, rc);
3239 SSMR3Close(pSSM);
3240 if (RT_SUCCESS(rc))
3241 {
3242 /*
3243 * Recompile the TBs.
3244 */
3245 if (pTbHead)
3246 {
3247 RTMsgInfo("Loaded %u TBs\n", cTbs);
3248 if (cTbs < cMinTbs)
3249 {
3250 RTMsgInfo("Duplicating TBs to reach %u TB target\n", cMinTbs);
3251 for (PIEMTB pTb = pTbHead;
3252 cTbs < cMinTbs && pTbAllocator->cInUseTbs + 2 <= pTbAllocator->cMaxTbs;
3253 pTb = pTb->pNext)
3254 {
3255 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
3256 if (!pTbCopy)
3257 break;
3258 *ppTbTail = pTbCopy;
3259 ppTbTail = &pTbCopy->pNext;
3260 cTbs++;
3261 }
3262 }
3263
3264 PIEMTB pTbWarmup = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead);
3265 if (pTbWarmup)
3266 {
3267 iemNativeRecompile(pVCpu, pTbWarmup);
3268 RTThreadSleep(512); /* to make the start visible in the profiler. */
3269 RTMsgInfo("Ready, set, go!\n");
3270
3271 if ((pTbWarmup->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3272 {
3273 uint32_t cFailed = 0;
3274 uint64_t const nsStart = RTTimeNanoTS();
3275 for (PIEMTB pTb = pTbHead; pTb; pTb = pTb->pNext)
3276 {
3277 iemNativeRecompile(pVCpu, pTb);
3278 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) != IEMTB_F_TYPE_NATIVE)
3279 cFailed++;
3280 }
3281 uint64_t const cNsElapsed = RTTimeNanoTS() - nsStart;
3282 RTMsgInfo("Recompiled %u TBs in %'RU64 ns - averaging %'RU64 ns/TB\n",
3283 cTbs, cNsElapsed, (cNsElapsed + cTbs - 1) / cTbs);
3284 if (cFailed)
3285 {
3286 RTMsgError("Unforuntately %u TB failed!", cFailed);
3287 rc = VERR_GENERAL_FAILURE;
3288 }
3289 RTThreadSleep(128); /* Another gap in the profiler timeline. */
3290 }
3291 else
3292 {
3293 RTMsgError("Failed to recompile the first TB!");
3294 rc = VERR_GENERAL_FAILURE;
3295 }
3296 }
3297 else
3298 rc = VERR_NO_MEMORY;
3299 }
3300 else
3301 {
3302 RTMsgError("'%s' contains no TBs!", pszFilename);
3303 rc = VERR_NO_DATA;
3304 }
3305 }
3306 }
3307 else
3308 RTMsgError("SSMR3Open failed on '%s': %Rrc", pszFilename, rc);
3309 return rc;
3310
3311# else
3312 RT_NOREF(pVM, pszFilename, cMinTbs);
3313 return VERR_NOT_IMPLEMENTED;
3314# endif
3315}
3316#endif /* IN_RING3 */
3317
3318
3319/*********************************************************************************************************************************
3320* Recompiled Execution Core *
3321*********************************************************************************************************************************/
3322
3323/** Default TB factor.
3324 * This is basically the number of nanoseconds we guess executing a TB takes
3325 * on average. We estimates it high if we can.
3326 * @note Best if this is a power of two so it can be translated to a shift. */
3327#define IEM_TIMER_POLL_DEFAULT_FACTOR UINT32_C(64)
3328/** The minimum number of nanoseconds we can allow between timer pollings.
3329 * This must take the cost of TMTimerPollBoolWithNanoTS into mind. We put that
3330 * cost at 104 ns now, thus this constant is at 256 ns. */
3331#define IEM_TIMER_POLL_MIN_NS UINT32_C(256)
3332/** The IEM_TIMER_POLL_MIN_NS value roughly translated to TBs, with some grains
3333 * of salt thrown in.
3334 * The idea is that we will be able to make progress with guest code execution
3335 * before polling timers and between running timers. */
3336#define IEM_TIMER_POLL_MIN_ITER UINT32_C(12)
3337/** The maximum number of nanoseconds we can allow between timer pollings.
3338 * This probably shouldn't be too high, as we don't have any timer
3339 * reprogramming feedback in the polling code. So, when a device reschedule a
3340 * timer for an earlier delivery, we won't know about it. */
3341#define IEM_TIMER_POLL_MAX_NS UINT32_C(8388608) /* 0x800000 ns = 8.4 ms */
3342/** The IEM_TIMER_POLL_MAX_NS value roughly translated to TBs, with some grains
3343 * of salt thrown in.
3344 * This helps control fluctuations in the NU benchmark. */
3345#define IEM_TIMER_POLL_MAX_ITER _512K
3346
3347#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3348/**
3349 * Calculates the number of TBs till the next timer polling using defaults.
3350 *
3351 * This is used when the previous run wasn't long enough to provide sufficient
3352 * data and when comming back from the HALT state and we haven't actually
3353 * executed anything for a while.
3354 */
3355DECL_FORCE_INLINE(uint32_t) iemPollTimersCalcDefaultCountdown(uint64_t cNsDelta) RT_NOEXCEPT
3356{
3357 if (cNsDelta >= IEM_TIMER_POLL_MAX_NS)
3358 return RT_MIN(IEM_TIMER_POLL_MAX_NS / IEM_TIMER_POLL_DEFAULT_FACTOR, IEM_TIMER_POLL_MAX_ITER);
3359
3360 cNsDelta = RT_BIT_64(ASMBitFirstSetU32(cNsDelta) - 1); /* round down to power of 2 */
3361 uint32_t const cRet = cNsDelta / IEM_TIMER_POLL_DEFAULT_FACTOR;
3362 if (cRet >= IEM_TIMER_POLL_MIN_ITER)
3363 {
3364 if (cRet <= IEM_TIMER_POLL_MAX_ITER)
3365 return cRet;
3366 return IEM_TIMER_POLL_MAX_ITER;
3367 }
3368 return IEM_TIMER_POLL_MIN_ITER;
3369}
3370#endif
3371
3372
3373/**
3374 * Helper for polling timers.
3375 */
3376DECLHIDDEN(int) iemPollTimers(PVMCC pVM, PVMCPUCC pVCpu) RT_NOEXCEPT
3377{
3378 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPoll, a);
3379
3380 /*
3381 * Check for VM_FF_TM_VIRTUAL_SYNC and call TMR3VirtualSyncFF if set.
3382 * This is something all EMTs can do.
3383 */
3384 /* If the virtual sync FF is set, respond to it. */
3385 bool fRanTimers = VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC);
3386 if (!fRanTimers)
3387 { /* likely */ }
3388 else
3389 {
3390 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3391 TMR3VirtualSyncFF(pVM, pVCpu);
3392 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3393 }
3394
3395 /*
3396 * Poll timers.
3397 *
3398 * On the 10980xe the polling averaging 314 ticks, with a min of 201, while
3399 * running a norton utilities DOS benchmark program. TSC runs at 3GHz,
3400 * translating that to 104 ns and 67 ns respectively. (An M2 booting win11
3401 * has an average of 2 ticks / 84 ns.)
3402 *
3403 * With the same setup the TMR3VirtualSyncFF and else branch here profiles
3404 * to 79751 ticks / 26583 ns on average, with a min of 1194 ticks / 398 ns.
3405 * (An M2 booting win11 has an average of 24 ticks / 1008 ns, with a min of
3406 * 8 ticks / 336 ns.)
3407 *
3408 * If we get a zero return value we run timers. Non-timer EMTs shouldn't
3409 * ever see a zero value here, so we just call TMR3TimerQueuesDo. However,
3410 * we do not re-run timers if we already called TMR3VirtualSyncFF above, we
3411 * try to make sure some code is executed first.
3412 */
3413 uint64_t nsNow = 0;
3414 uint64_t cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3415 if (cNsDelta >= 1) /* It is okay to run virtual sync timers a little early. */
3416 { /* likely */ }
3417 else if (!fRanTimers || VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
3418 {
3419 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3420 TMR3TimerQueuesDo(pVM);
3421 fRanTimers = true;
3422 nsNow = 0;
3423 cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3424 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3425 }
3426 else
3427 cNsDelta = 33;
3428
3429 /*
3430 * Calc interval and update the timestamps.
3431 */
3432 uint64_t const cNsSinceLast = nsNow - pVCpu->iem.s.nsRecompilerPollNow;
3433 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3434 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3435
3436 /*
3437 * Set the next polling count down value.
3438 *
3439 * We take the previous value and adjust it according to the cNsSinceLast
3440 * value, if it's not within reason. This can't be too accurate since the
3441 * CheckIrq and intra-TB-checks aren't evenly spaced, they depends highly
3442 * on the guest code.
3443 */
3444#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3445 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3446 if (cNsDelta >= RT_NS_1SEC / 4)
3447 {
3448 /*
3449 * Non-timer EMTs should end up here with a fixed 500ms delta, just return
3450 * the max and keep the polling over head to the deadicated timer EMT.
3451 */
3452 AssertCompile(IEM_TIMER_POLL_MAX_ITER * IEM_TIMER_POLL_DEFAULT_FACTOR <= RT_NS_100MS);
3453 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3454 }
3455 else
3456 {
3457 /*
3458 * This is the timer EMT.
3459 */
3460 if (cNsDelta <= IEM_TIMER_POLL_MIN_NS)
3461 {
3462 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollTiny);
3463 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3464 }
3465 else
3466 {
3467 uint32_t const cNsDeltaAdj = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS : (uint32_t)cNsDelta;
3468 uint32_t const cNsDeltaSlack = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS / 2 : cNsDeltaAdj / 4;
3469 if ( cNsSinceLast < RT_MAX(IEM_TIMER_POLL_MIN_NS, 64)
3470 || cItersTillNextPoll < IEM_TIMER_POLL_MIN_ITER /* paranoia */)
3471 {
3472 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollDefaultCalc);
3473 cItersTillNextPoll = iemPollTimersCalcDefaultCountdown(cNsDeltaAdj);
3474 }
3475 else if ( cNsSinceLast >= cNsDeltaAdj + cNsDeltaSlack
3476 || cNsSinceLast <= cNsDeltaAdj - cNsDeltaSlack)
3477 {
3478 if (cNsSinceLast >= cItersTillNextPoll)
3479 {
3480 uint32_t uFactor = (uint32_t)(cNsSinceLast + cItersTillNextPoll - 1) / cItersTillNextPoll;
3481 cItersTillNextPoll = cNsDeltaAdj / uFactor;
3482 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorDivision, uFactor);
3483 }
3484 else
3485 {
3486 uint32_t uFactor = cItersTillNextPoll / (uint32_t)cNsSinceLast;
3487 cItersTillNextPoll = cNsDeltaAdj * uFactor;
3488 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorMultiplication, uFactor);
3489 }
3490
3491 if (cItersTillNextPoll >= IEM_TIMER_POLL_MIN_ITER)
3492 {
3493 if (cItersTillNextPoll <= IEM_TIMER_POLL_MAX_ITER)
3494 { /* likely */ }
3495 else
3496 {
3497 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollMax);
3498 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3499 }
3500 }
3501 else
3502 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3503 }
3504 else
3505 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollUnchanged);
3506 }
3507 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3508 }
3509#else
3510/** Poll timers every 400 us / 2500 Hz. (source: thin air) */
3511# define IEM_TIMER_POLL_IDEAL_NS (400U * RT_NS_1US)
3512 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3513 uint32_t const cNsIdealPollInterval = IEM_TIMER_POLL_IDEAL_NS;
3514 int64_t const nsFromIdeal = cNsSinceLast - cNsIdealPollInterval;
3515 if (nsFromIdeal < 0)
3516 {
3517 if ((uint64_t)-nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll < _64K)
3518 {
3519 cItersTillNextPoll += cItersTillNextPoll / 8;
3520 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3521 }
3522 }
3523 else
3524 {
3525 if ((uint64_t)nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll > 256)
3526 {
3527 cItersTillNextPoll -= cItersTillNextPoll / 8;
3528 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3529 }
3530 }
3531#endif
3532 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillNextPoll;
3533
3534 /*
3535 * Repeat the IRQ and FF checks.
3536 */
3537 if (cNsDelta > 0)
3538 {
3539 uint32_t fCpu = pVCpu->fLocalForcedActions;
3540 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3541 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3542 | VMCPU_FF_TLB_FLUSH
3543 | VMCPU_FF_UNHALT );
3544 if (RT_LIKELY( ( !fCpu
3545 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3546 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3547 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx)) ) )
3548 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
3549 {
3550 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3551 return VINF_SUCCESS;
3552 }
3553 }
3554 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3555 return VINF_IEM_REEXEC_BREAK_FF;
3556}
3557
3558
3559/** Helper for iemTbExec. */
3560DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
3561{
3562 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
3563 Assert(idx < pTb->cTbLookupEntries);
3564 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
3565}
3566
3567
3568/**
3569 * Executes a translation block.
3570 *
3571 * @returns Strict VBox status code.
3572 * @param pVCpu The cross context virtual CPU structure of the calling
3573 * thread.
3574 * @param pTb The translation block to execute.
3575 */
3576static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
3577{
3578 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
3579
3580 /*
3581 * Set the current TB so CIMPL functions may get at it.
3582 */
3583 pVCpu->iem.s.pCurTbR3 = pTb;
3584 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
3585
3586 /*
3587 * Execute the block.
3588 */
3589#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3590 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
3591 {
3592 pVCpu->iem.s.cTbExecNative++;
3593 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
3594# ifdef LOG_ENABLED
3595 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
3596# endif
3597
3598# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3599 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
3600# endif
3601# ifdef RT_ARCH_AMD64
3602 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
3603# else
3604 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
3605# endif
3606
3607# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3608 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3609# endif
3610# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3611 /* Restore FPCR/MXCSR if the TB modified it. */
3612 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3613 {
3614 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3615 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3616 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3617 }
3618# endif
3619# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3620 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
3621# endif
3622 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3623 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3624 { /* likely */ }
3625 else
3626 {
3627 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
3628 pVCpu->iem.s.pCurTbR3 = NULL;
3629
3630 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3631 only to break out of TB execution early. */
3632 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3633 {
3634 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
3635 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3636 }
3637
3638 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
3639 only to break out of TB execution early due to pending FFs. */
3640 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
3641 {
3642 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
3643 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3644 }
3645
3646 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
3647 and converted to VINF_SUCCESS or whatever is appropriate. */
3648 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
3649 {
3650 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
3651 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
3652 }
3653
3654 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
3655 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3656 }
3657 }
3658 else
3659#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
3660 {
3661 /*
3662 * The threaded execution loop.
3663 */
3664 pVCpu->iem.s.cTbExecThreaded++;
3665 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
3666#ifdef LOG_ENABLED
3667 uint64_t uRipPrev = UINT64_MAX;
3668#endif
3669 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3670 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3671 while (cCallsLeft-- > 0)
3672 {
3673#ifdef LOG_ENABLED
3674 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
3675 {
3676 uRipPrev = pVCpu->cpum.GstCtx.rip;
3677 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
3678 }
3679 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
3680 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
3681 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
3682#endif
3683#ifdef VBOX_WITH_STATISTICS
3684 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
3685 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
3686#endif
3687 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
3688 pCallEntry->auParams[0],
3689 pCallEntry->auParams[1],
3690 pCallEntry->auParams[2]);
3691 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3692 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3693 pCallEntry++;
3694 else if (rcStrict == VINF_IEM_REEXEC_JUMP)
3695 {
3696 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
3697 Assert(cCallsLeft == 0);
3698 uint32_t const idxTarget = (uint32_t)pCallEntry->auParams[0];
3699 cCallsLeft = pTb->Thrd.cCalls;
3700 AssertBreak(idxTarget < cCallsLeft - 1);
3701 cCallsLeft -= idxTarget;
3702 pCallEntry = &pTb->Thrd.paCalls[idxTarget];
3703 AssertBreak(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET);
3704 }
3705 else
3706 {
3707 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
3708 pVCpu->iem.s.pCurTbR3 = NULL;
3709 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
3710 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
3711
3712 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3713 only to break out of TB execution early. */
3714 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3715 {
3716#ifdef VBOX_WITH_STATISTICS
3717 if (pCallEntry->uTbLookup)
3718 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3719 else
3720 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3721#endif
3722 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3723 }
3724 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3725 }
3726 }
3727
3728 /* Update the lookup entry. */
3729 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3730 }
3731
3732 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3733 pVCpu->iem.s.pCurTbR3 = NULL;
3734 return VINF_SUCCESS;
3735}
3736
3737
3738/**
3739 * This is called when the PC doesn't match the current pbInstrBuf.
3740 *
3741 * Upon return, we're ready for opcode fetching. But please note that
3742 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3743 * MMIO or unassigned).
3744 */
3745static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3746{
3747 pVCpu->iem.s.pbInstrBuf = NULL;
3748 pVCpu->iem.s.offCurInstrStart = 0;
3749 pVCpu->iem.s.offInstrNextByte = 0;
3750 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3751 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3752}
3753
3754
3755/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3756DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3757{
3758 /*
3759 * Set uCurTbStartPc to RIP and calc the effective PC.
3760 */
3761 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3762#if 0 /* unused */
3763 pVCpu->iem.s.uCurTbStartPc = uPc;
3764#endif
3765 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3766 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3767
3768 /*
3769 * Advance within the current buffer (PAGE) when possible.
3770 */
3771 if (pVCpu->iem.s.pbInstrBuf)
3772 {
3773 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3774 if (off < pVCpu->iem.s.cbInstrBufTotal)
3775 {
3776 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3777 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3778 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3779 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3780 else
3781 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3782
3783 return pVCpu->iem.s.GCPhysInstrBuf + off;
3784 }
3785 }
3786 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3787}
3788
3789
3790/**
3791 * Determines the extra IEMTB_F_XXX flags.
3792 *
3793 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3794 * IEMTB_F_CS_LIM_CHECKS (or zero).
3795 * @param pVCpu The cross context virtual CPU structure of the calling
3796 * thread.
3797 */
3798DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3799{
3800 uint32_t fRet = 0;
3801
3802 /*
3803 * Determine the inhibit bits.
3804 */
3805 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3806 { /* typical */ }
3807 else
3808 {
3809 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3810 fRet |= IEMTB_F_INHIBIT_SHADOW;
3811 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3812 fRet |= IEMTB_F_INHIBIT_NMI;
3813 }
3814
3815 /*
3816 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3817 * likely to go invalid before the end of the translation block.
3818 */
3819 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3820 return fRet;
3821
3822 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3823 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3824 return fRet;
3825 return fRet | IEMTB_F_CS_LIM_CHECKS;
3826}
3827
3828
3829VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu, bool fWasHalted)
3830{
3831 /*
3832 * See if there is an interrupt pending in TRPM, inject it if we can.
3833 */
3834 if (!TRPMHasTrap(pVCpu))
3835 { /* likely */ }
3836 else
3837 {
3838 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3839 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3840 { /*likely */ }
3841 else
3842 return rcStrict;
3843 }
3844
3845 /*
3846 * Init the execution environment.
3847 */
3848#if 1 /** @todo this seems like a good idea, however if we ever share memory
3849 * directly with other threads on the host, it isn't necessarily... */
3850 if (pVM->cCpus == 1)
3851 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3852 else
3853#endif
3854 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3855
3856 if (RT_LIKELY(!fWasHalted && pVCpu->iem.s.msRecompilerPollNow != 0))
3857 { }
3858 else
3859 {
3860 /* Do polling after halt and the first time we get here. */
3861#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3862 uint64_t nsNow = 0;
3863 uint32_t const cItersTillPoll = iemPollTimersCalcDefaultCountdown(TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow));
3864 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillPoll;
3865 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillPoll;
3866#else
3867 uint64_t const nsNow = TMVirtualGetNoCheck(pVM);
3868#endif
3869 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3870 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3871 }
3872 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3873
3874 /*
3875 * Run-loop.
3876 *
3877 * If we're using setjmp/longjmp we combine all the catching here to avoid
3878 * having to call setjmp for each block we're executing.
3879 */
3880 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3881 for (;;)
3882 {
3883 VBOXSTRICTRC rcStrict;
3884 IEM_TRY_SETJMP(pVCpu, rcStrict)
3885 {
3886 for (;;)
3887 {
3888 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3889 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3890 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3891 {
3892 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3893 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3894 if (pTb)
3895 rcStrict = iemTbExec(pVCpu, pTb);
3896 else
3897 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3898 }
3899 else
3900 {
3901 /* This can only happen if the current PC cannot be translated into a
3902 host pointer, which means we're in MMIO or unmapped memory... */
3903#if defined(VBOX_STRICT) && defined(IN_RING3)
3904 rcStrict = DBGFSTOP(pVM);
3905 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3906 return rcStrict;
3907#endif
3908 rcStrict = IEMExecLots(pVCpu, 2048, 511, NULL);
3909 }
3910 if (rcStrict == VINF_SUCCESS)
3911 {
3912 Assert(pVCpu->iem.s.cActiveMappings == 0);
3913
3914 /* Note! This IRQ/FF check is repeated in iemPollTimers, iemThreadedFunc_BltIn_CheckIrq
3915 and emitted by iemNativeRecompFunc_BltIn_CheckIrq. */
3916 uint64_t fCpu = pVCpu->fLocalForcedActions;
3917 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3918 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3919 | VMCPU_FF_TLB_FLUSH
3920 | VMCPU_FF_UNHALT );
3921 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3922 if (RT_LIKELY( ( !fCpu
3923 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3924 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3925 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3926 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3927 {
3928 /* Once in a while we need to poll timers here. */
3929 if ((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
3930 { /* likely */ }
3931 else
3932 {
3933 int rc = iemPollTimers(pVM, pVCpu);
3934 if (rc != VINF_SUCCESS)
3935 return VINF_SUCCESS;
3936 }
3937 }
3938 else
3939 return VINF_SUCCESS;
3940 }
3941 else
3942 return rcStrict;
3943 }
3944 }
3945 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3946 {
3947 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3948 pVCpu->iem.s.cLongJumps++;
3949#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3950 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3951#endif
3952 if (pVCpu->iem.s.cActiveMappings > 0)
3953 iemMemRollback(pVCpu);
3954
3955#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3956 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3957 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3958 {
3959 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3960# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3961 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3962 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3963# endif
3964
3965#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3966 /* Restore FPCR/MXCSR if the TB modified it. */
3967 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3968 {
3969 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3970 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3971 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3972 }
3973#endif
3974 }
3975#endif
3976
3977#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3978 /* If pTb isn't NULL we're in iemTbExec. */
3979 if (!pTb)
3980 {
3981 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3982 pTb = pVCpu->iem.s.pCurTbR3;
3983 if (pTb)
3984 {
3985 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3986 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3987 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3988 }
3989 }
3990#endif
3991 pVCpu->iem.s.pCurTbR3 = NULL;
3992 return rcStrict;
3993 }
3994 IEM_CATCH_LONGJMP_END(pVCpu);
3995 }
3996}
3997
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette