VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 105712

Last change on this file since 105712 was 105712, checked in by vboxsync, 8 months ago

VMM/IEM: Kicked out some debug tracing code from iemPollTimers. bugref:10656

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 145.6 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 105712 2024-08-16 20:55:59Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
117# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
118#endif
119
120
121/**
122 * Calculates the effective address of a ModR/M memory operand, extended version
123 * for use in the recompilers.
124 *
125 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
126 *
127 * May longjmp on internal error.
128 *
129 * @return The effective address.
130 * @param pVCpu The cross context virtual CPU structure of the calling thread.
131 * @param bRm The ModRM byte.
132 * @param cbImmAndRspOffset - First byte: The size of any immediate
133 * following the effective address opcode bytes
134 * (only for RIP relative addressing).
135 * - Second byte: RSP displacement (for POP [ESP]).
136 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
137 * SIB byte (bits 39:32).
138 *
139 * @note This must be defined in a source file with matching
140 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
141 * or implemented differently...
142 */
143RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
144{
145 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
146# define SET_SS_DEF() \
147 do \
148 { \
149 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
150 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
151 } while (0)
152
153 if (!IEM_IS_64BIT_CODE(pVCpu))
154 {
155/** @todo Check the effective address size crap! */
156 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
157 {
158 uint16_t u16EffAddr;
159
160 /* Handle the disp16 form with no registers first. */
161 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
162 {
163 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
164 *puInfo = u16EffAddr;
165 }
166 else
167 {
168 /* Get the displacment. */
169 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
170 {
171 case 0: u16EffAddr = 0; break;
172 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
173 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
174 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
175 }
176 *puInfo = u16EffAddr;
177
178 /* Add the base and index registers to the disp. */
179 switch (bRm & X86_MODRM_RM_MASK)
180 {
181 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
182 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
183 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
184 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
185 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
186 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
187 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
188 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
189 }
190 }
191
192 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
193 return u16EffAddr;
194 }
195
196 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
197 uint32_t u32EffAddr;
198 uint64_t uInfo;
199
200 /* Handle the disp32 form with no registers first. */
201 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
202 {
203 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
204 uInfo = u32EffAddr;
205 }
206 else
207 {
208 /* Get the register (or SIB) value. */
209 uInfo = 0;
210 switch ((bRm & X86_MODRM_RM_MASK))
211 {
212 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
213 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
214 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
215 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
216 case 4: /* SIB */
217 {
218 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
219 uInfo = (uint64_t)bSib << 32;
220
221 /* Get the index and scale it. */
222 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
223 {
224 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
225 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
226 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
227 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
228 case 4: u32EffAddr = 0; /*none */ break;
229 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
230 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
231 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
232 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
233 }
234 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
235
236 /* add base */
237 switch (bSib & X86_SIB_BASE_MASK)
238 {
239 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
240 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
241 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
242 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
243 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
244 case 5:
245 if ((bRm & X86_MODRM_MOD_MASK) != 0)
246 {
247 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
248 SET_SS_DEF();
249 }
250 else
251 {
252 uint32_t u32Disp;
253 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
254 u32EffAddr += u32Disp;
255 uInfo |= u32Disp;
256 }
257 break;
258 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
259 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
260 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
261 }
262 break;
263 }
264 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
265 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
266 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
267 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
268 }
269
270 /* Get and add the displacement. */
271 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
272 {
273 case 0:
274 break;
275 case 1:
276 {
277 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
278 u32EffAddr += i8Disp;
279 uInfo |= (uint32_t)(int32_t)i8Disp;
280 break;
281 }
282 case 2:
283 {
284 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
285 u32EffAddr += u32Disp;
286 uInfo |= u32Disp;
287 break;
288 }
289 default:
290 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
291 }
292 }
293
294 *puInfo = uInfo;
295 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
296 return u32EffAddr;
297 }
298
299 uint64_t u64EffAddr;
300 uint64_t uInfo;
301
302 /* Handle the rip+disp32 form with no registers first. */
303 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
304 {
305 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
306 uInfo = (uint32_t)u64EffAddr;
307 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
308 }
309 else
310 {
311 /* Get the register (or SIB) value. */
312 uInfo = 0;
313 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
314 {
315 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
316 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
317 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
318 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
319 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
320 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
321 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
322 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
323 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
324 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
325 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
326 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
327 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
328 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
329 /* SIB */
330 case 4:
331 case 12:
332 {
333 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
334 uInfo = (uint64_t)bSib << 32;
335
336 /* Get the index and scale it. */
337 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
338 {
339 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
340 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
341 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
342 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
343 case 4: u64EffAddr = 0; /*none */ break;
344 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
345 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
346 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
347 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
348 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
349 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
350 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
351 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
352 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
353 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
354 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
355 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
356 }
357 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
358
359 /* add base */
360 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
361 {
362 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
363 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
364 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
365 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
366 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
367 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
368 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
369 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
370 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
371 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
372 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
373 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
374 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
375 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
376 /* complicated encodings */
377 case 5:
378 case 13:
379 if ((bRm & X86_MODRM_MOD_MASK) != 0)
380 {
381 if (!pVCpu->iem.s.uRexB)
382 {
383 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
384 SET_SS_DEF();
385 }
386 else
387 u64EffAddr += pVCpu->cpum.GstCtx.r13;
388 }
389 else
390 {
391 uint32_t u32Disp;
392 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
393 u64EffAddr += (int32_t)u32Disp;
394 uInfo |= u32Disp;
395 }
396 break;
397 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
398 }
399 break;
400 }
401 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
402 }
403
404 /* Get and add the displacement. */
405 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
406 {
407 case 0:
408 break;
409 case 1:
410 {
411 int8_t i8Disp;
412 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
413 u64EffAddr += i8Disp;
414 uInfo |= (uint32_t)(int32_t)i8Disp;
415 break;
416 }
417 case 2:
418 {
419 uint32_t u32Disp;
420 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
421 u64EffAddr += (int32_t)u32Disp;
422 uInfo |= u32Disp;
423 break;
424 }
425 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
426 }
427
428 }
429
430 *puInfo = uInfo;
431 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
432 {
433 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
434 return u64EffAddr;
435 }
436 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
437 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
438 return u64EffAddr & UINT32_MAX;
439}
440
441
442/*********************************************************************************************************************************
443* Translation Block Cache. *
444*********************************************************************************************************************************/
445
446/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
447static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
448{
449 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
450 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
451 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
452 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
453 if (cMsSinceUse1 != cMsSinceUse2)
454 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
455 if (pTb1->cUsed != pTb2->cUsed)
456 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
457 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
458 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
459 return 0;
460}
461
462#ifdef VBOX_STRICT
463/**
464 * Assertion helper that checks a collisions list count.
465 */
466static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
467{
468 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
469 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
470 while (pTb)
471 {
472 pTb = pTb->pNext;
473 cLeft--;
474 }
475 AssertMsg(cLeft == 0,
476 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
477 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
478}
479#endif
480
481
482DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
483{
484 STAM_PROFILE_START(&pTbCache->StatPrune, a);
485
486 /*
487 * First convert the collision list to an array.
488 */
489 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
490 uintptr_t cInserted = 0;
491 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
492
493 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
494
495 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
496 {
497 apSortedTbs[cInserted++] = pTbCollision;
498 pTbCollision = pTbCollision->pNext;
499 }
500
501 /* Free any excess (impossible). */
502 if (RT_LIKELY(!pTbCollision))
503 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
504 else
505 do
506 {
507 PIEMTB pTbToFree = pTbCollision;
508 pTbCollision = pTbToFree->pNext;
509 iemTbAllocatorFree(pVCpu, pTbToFree);
510 } while (pTbCollision);
511
512 /*
513 * Sort it by most recently used and usage count.
514 */
515 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
516
517 /* We keep half the list for now. Perhaps a bit aggressive... */
518 uintptr_t const cKeep = cInserted / 2;
519
520 /* First free up the TBs we don't wish to keep (before creating the new
521 list because otherwise the free code will scan the list for each one
522 without ever finding it). */
523 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
524 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
525
526 /* Then chain the new TB together with the ones we like to keep of the
527 existing ones and insert this list into the hash table. */
528 pTbCollision = pTb;
529 for (uintptr_t idx = 0; idx < cKeep; idx++)
530 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
531 pTbCollision->pNext = NULL;
532
533 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
534#ifdef VBOX_STRICT
535 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
536#endif
537
538 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
539}
540
541
542static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
543{
544 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
545 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
546 if (!pTbOldHead)
547 {
548 pTb->pNext = NULL;
549 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
550 }
551 else
552 {
553 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
554 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
555 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
556 {
557 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
558 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
559#ifdef VBOX_STRICT
560 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
561#endif
562 }
563 else
564 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
565 }
566}
567
568
569/**
570 * Unlinks @a pTb from the hash table if found in it.
571 *
572 * @returns true if unlinked, false if not present.
573 * @param pTbCache The hash table.
574 * @param pTb The TB to remove.
575 */
576static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
577{
578 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
579 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
580 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
581
582 /*
583 * At the head of the collision list?
584 */
585 if (pTbHash == pTb)
586 {
587 if (!pTb->pNext)
588 pTbCache->apHash[idxHash] = NULL;
589 else
590 {
591 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
592 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
593#ifdef VBOX_STRICT
594 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
595#endif
596 }
597 return true;
598 }
599
600 /*
601 * Search the collision list.
602 */
603 PIEMTB const pTbHead = pTbHash;
604 while (pTbHash)
605 {
606 PIEMTB const pNextTb = pTbHash->pNext;
607 if (pNextTb == pTb)
608 {
609 pTbHash->pNext = pTb->pNext;
610 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
611#ifdef VBOX_STRICT
612 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
613#endif
614 return true;
615 }
616 pTbHash = pNextTb;
617 }
618 return false;
619}
620
621
622/**
623 * Looks up a TB for the given PC and flags in the cache.
624 *
625 * @returns Pointer to TB on success, NULL if not found.
626 * @param pVCpu The cross context virtual CPU structure of the
627 * calling thread.
628 * @param pTbCache The translation block cache.
629 * @param GCPhysPc The PC to look up a TB for.
630 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
631 * the lookup.
632 * @thread EMT(pVCpu)
633 */
634static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
635 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
636{
637 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
638
639 /*
640 * First consult the lookup table entry.
641 */
642 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
643 PIEMTB pTb = *ppTbLookup;
644 if (pTb)
645 {
646 if (pTb->GCPhysPc == GCPhysPc)
647 {
648 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
649 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
650 {
651 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
652 {
653 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
654 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
655 pTb->cUsed++;
656#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
657 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
658 {
659 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
660 return pTb;
661 }
662 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
663 return iemNativeRecompile(pVCpu, pTb);
664#else
665 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
666 return pTb;
667#endif
668 }
669 }
670 }
671 }
672
673 /*
674 * Then consult the hash table.
675 */
676 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
677#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
678 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
679#endif
680 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
681 while (pTb)
682 {
683 if (pTb->GCPhysPc == GCPhysPc)
684 {
685 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
686 {
687 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
688 {
689 STAM_COUNTER_INC(&pTbCache->cLookupHits);
690 AssertMsg(cLeft > 0, ("%d\n", cLeft));
691
692 *ppTbLookup = pTb;
693 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
694 pTb->cUsed++;
695#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
696 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
697 {
698 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
699 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
700 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
701 return pTb;
702 }
703 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
704 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
705 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
706 return iemNativeRecompile(pVCpu, pTb);
707#else
708 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
709 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
710 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
711 return pTb;
712#endif
713 }
714 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
715 }
716 else
717 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
718 }
719 else
720 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
721
722 pTb = pTb->pNext;
723#ifdef VBOX_STRICT
724 cLeft--;
725#endif
726 }
727 AssertMsg(cLeft == 0, ("%d\n", cLeft));
728 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
729 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
730 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
731 return pTb;
732}
733
734
735/*********************************************************************************************************************************
736* Translation Block Allocator.
737*********************************************************************************************************************************/
738/*
739 * Translation block allocationmanagement.
740 */
741
742#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
743# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
744 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
745# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
746 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
747# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
748 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
749#else
750# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
751 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
752# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
753 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
754# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
755 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
756#endif
757/** Makes a TB index from a chunk index and TB index within that chunk. */
758#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
759 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
760
761
762/**
763 * Initializes the TB allocator and cache for an EMT.
764 *
765 * @returns VBox status code.
766 * @param pVM The VM handle.
767 * @param cInitialTbs The initial number of translation blocks to
768 * preallocator.
769 * @param cMaxTbs The max number of translation blocks allowed.
770 * @param cbInitialExec The initial size of the executable memory allocator.
771 * @param cbMaxExec The max size of the executable memory allocator.
772 * @param cbChunkExec The chunk size for executable memory allocator. Zero
773 * or UINT32_MAX for automatically determining this.
774 * @thread EMT
775 */
776DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
777 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
778{
779 PVMCPUCC pVCpu = VMMGetCpu(pVM);
780 Assert(!pVCpu->iem.s.pTbCacheR3);
781 Assert(!pVCpu->iem.s.pTbAllocatorR3);
782
783 /*
784 * Calculate the chunk size of the TB allocator.
785 * The minimum chunk size is 2MiB.
786 */
787 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
788 uint32_t cbPerChunk = _2M;
789 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
790#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
791 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
792 uint8_t cChunkShift = 21 - cTbShift;
793 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
794#endif
795 for (;;)
796 {
797 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
798 break;
799 cbPerChunk *= 2;
800 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
801#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
802 cChunkShift += 1;
803#endif
804 }
805
806 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
807 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
808 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
809
810 cMaxTbs = cMaxChunks * cTbsPerChunk;
811
812 /*
813 * Allocate and initalize it.
814 */
815 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
816 if (!pTbAllocator)
817 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
818 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
819 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
820 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
821 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
822 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
823 pTbAllocator->cbPerChunk = cbPerChunk;
824 pTbAllocator->cMaxTbs = cMaxTbs;
825 pTbAllocator->pTbsFreeHead = NULL;
826#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
827 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
828 pTbAllocator->cChunkShift = cChunkShift;
829 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
830#endif
831
832 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
833
834 /*
835 * Allocate the initial chunks.
836 */
837 for (uint32_t idxChunk = 0; ; idxChunk++)
838 {
839 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
840 if (!paTbs)
841 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
842 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
843 cbPerChunk, idxChunk, pVCpu->idCpu);
844
845 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
846 {
847 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
848 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
849 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
850 }
851 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
852 pTbAllocator->cTotalTbs += cTbsPerChunk;
853
854 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
855 break;
856 }
857
858 /*
859 * Calculate the size of the hash table. We double the max TB count and
860 * round it up to the nearest power of two.
861 */
862 uint32_t cCacheEntries = cMaxTbs * 2;
863 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
864 {
865 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
866 cCacheEntries = RT_BIT_32(iBitTop);
867 Assert(cCacheEntries >= cMaxTbs * 2);
868 }
869
870 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
871 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
872 if (!pTbCache)
873 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
874 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
875 cbTbCache, cCacheEntries, pVCpu->idCpu);
876
877 /*
878 * Initialize it (assumes zeroed by the allocator).
879 */
880 pTbCache->uMagic = IEMTBCACHE_MAGIC;
881 pTbCache->cHash = cCacheEntries;
882 pTbCache->uHashMask = cCacheEntries - 1;
883 Assert(pTbCache->cHash > pTbCache->uHashMask);
884 pVCpu->iem.s.pTbCacheR3 = pTbCache;
885
886 /*
887 * Initialize the native executable memory allocator.
888 */
889#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
890 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
891 AssertLogRelRCReturn(rc, rc);
892#else
893 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
894#endif
895
896 return VINF_SUCCESS;
897}
898
899
900/**
901 * Inner free worker.
902 */
903static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
904 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
905{
906 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
907 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
908 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
909#ifdef VBOX_STRICT
910 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
911 Assert(pTbOther != pTb);
912#endif
913
914 /*
915 * Unlink the TB from the hash table.
916 */
917 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
918
919 /*
920 * Free the TB itself.
921 */
922 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
923 {
924 case IEMTB_F_TYPE_THREADED:
925 pTbAllocator->cThreadedTbs -= 1;
926 RTMemFree(pTb->Thrd.paCalls);
927 break;
928#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
929 case IEMTB_F_TYPE_NATIVE:
930 pTbAllocator->cNativeTbs -= 1;
931 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
932 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
933 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
934 break;
935#endif
936 default:
937 AssertFailed();
938 }
939
940 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
941
942 pTb->pNext = pTbAllocator->pTbsFreeHead;
943 pTbAllocator->pTbsFreeHead = pTb;
944 pTb->fFlags = 0;
945 pTb->GCPhysPc = UINT64_MAX;
946 pTb->Gen.uPtr = 0;
947 pTb->Gen.uData = 0;
948 pTb->cTbLookupEntries = 0;
949 pTb->cbOpcodes = 0;
950 pTb->pabOpcodes = NULL;
951
952 Assert(pTbAllocator->cInUseTbs > 0);
953
954 pTbAllocator->cInUseTbs -= 1;
955 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
956}
957
958
959/**
960 * Frees the given TB.
961 *
962 * @param pVCpu The cross context virtual CPU structure of the calling
963 * thread.
964 * @param pTb The translation block to free.
965 * @thread EMT(pVCpu)
966 */
967DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
968{
969 /*
970 * Validate state.
971 */
972 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
973 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
974 uint8_t const idxChunk = pTb->idxAllocChunk;
975 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
976 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
977 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
978
979 /*
980 * Invalidate the TB lookup pointer and call the inner worker.
981 */
982 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
983 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
984}
985
986
987/**
988 * Schedules a TB for freeing when it's not longer being executed and/or part of
989 * the caller's call stack.
990 *
991 * The TB will be removed from the translation block cache, though, so it isn't
992 * possible to executed it again and the IEMTB::pNext member can be used to link
993 * it together with other TBs awaiting freeing.
994 *
995 * @param pVCpu The cross context virtual CPU structure of the calling
996 * thread.
997 * @param pTb The translation block to schedule for freeing.
998 */
999static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1000{
1001 /*
1002 * Validate state.
1003 */
1004 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1005 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1006 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1007 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1008 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1009 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1010#ifdef VBOX_STRICT
1011 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1012 Assert(pTbOther != pTb);
1013#endif
1014
1015 /*
1016 * Remove it from the cache and prepend it to the allocator's todo list.
1017 *
1018 * Note! It could still be in various lookup tables, so we trash the GCPhys
1019 * and CS attribs to ensure it won't be reused.
1020 */
1021 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1022 pTb->GCPhysPc = NIL_RTGCPHYS;
1023 pTb->x86.fAttr = UINT16_MAX;
1024
1025 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1026 pTbAllocator->pDelayedFreeHead = pTb;
1027}
1028
1029
1030/**
1031 * Processes the delayed frees.
1032 *
1033 * This is called by the allocator function as well as the native recompile
1034 * function before making any TB or executable memory allocations respectively.
1035 */
1036void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1037{
1038 /** @todo r-bird: these have already been removed from the cache,
1039 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1040 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1041 pTbAllocator->pDelayedFreeHead = NULL;
1042 while (pTb)
1043 {
1044 PIEMTB const pTbNext = pTb->pNext;
1045 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1046 iemTbAllocatorFree(pVCpu, pTb);
1047 pTb = pTbNext;
1048 }
1049}
1050
1051
1052#if 0
1053/**
1054 * Frees all TBs.
1055 */
1056static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1057{
1058 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1059 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1060 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1061
1062 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1063
1064 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1065 while (idxChunk-- > 0)
1066 {
1067 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1068 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1069 while (idxTb-- > 0)
1070 {
1071 PIEMTB const pTb = &paTbs[idxTb];
1072 if (pTb->fFlags)
1073 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1074 }
1075 }
1076
1077 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1078
1079# if 1
1080 /* Reset the free list. */
1081 pTbAllocator->pTbsFreeHead = NULL;
1082 idxChunk = pTbAllocator->cAllocatedChunks;
1083 while (idxChunk-- > 0)
1084 {
1085 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1086 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1087 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1088 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1089 {
1090 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1091 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1092 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1093 }
1094 }
1095# endif
1096
1097# if 1
1098 /* Completely reset the TB cache. */
1099 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1100# endif
1101
1102 return VINF_SUCCESS;
1103}
1104#endif
1105
1106
1107/**
1108 * Grow the translation block allocator with another chunk.
1109 */
1110static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1111{
1112 /*
1113 * Validate state.
1114 */
1115 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1116 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1117 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1118 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1119 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1120
1121 /*
1122 * Allocate a new chunk and add it to the allocator.
1123 */
1124 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1125 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1126 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1127
1128 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1129 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1130 {
1131 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1132 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1133 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1134 }
1135 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1136 pTbAllocator->cTotalTbs += cTbsPerChunk;
1137
1138 return VINF_SUCCESS;
1139}
1140
1141
1142/**
1143 * Allocates a TB from allocator with free block.
1144 *
1145 * This is common code to both the fast and slow allocator code paths.
1146 */
1147DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1148{
1149 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1150 Assert(pTbAllocator->pTbsFreeHead);
1151
1152 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1153 pTbAllocator->pTbsFreeHead = pTb->pNext;
1154 pTbAllocator->cInUseTbs += 1;
1155 if (fThreaded)
1156 pTbAllocator->cThreadedTbs += 1;
1157 else
1158 pTbAllocator->cNativeTbs += 1;
1159 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1160 return pTb;
1161}
1162
1163
1164/**
1165 * Slow path for iemTbAllocatorAlloc.
1166 */
1167static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1168{
1169 /*
1170 * With some luck we can add another chunk.
1171 */
1172 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1173 {
1174 int rc = iemTbAllocatorGrow(pVCpu);
1175 if (RT_SUCCESS(rc))
1176 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1177 }
1178
1179 /*
1180 * We have to prune stuff. Sigh.
1181 *
1182 * This requires scanning for older TBs and kick them out. Not sure how to
1183 * best do this as we don't want to maintain any list of TBs ordered by last
1184 * usage time. But one reasonably simple approach would be that each time we
1185 * get here we continue a sequential scan of the allocation chunks,
1186 * considering just a smallish number of TBs and freeing a fixed portion of
1187 * them. Say, we consider the next 128 TBs, freeing the least recently used
1188 * in out of groups of 4 TBs, resulting in 32 free TBs.
1189 */
1190 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1191 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1192 uint32_t const cTbsToPrune = 128;
1193 uint32_t const cTbsPerGroup = 4;
1194 uint32_t cFreedTbs = 0;
1195#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1196 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1197#else
1198 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1199#endif
1200 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1201 idxTbPruneFrom = 0;
1202 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1203 {
1204 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1205 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1206 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1207 uint32_t cMsAge = msNow - pTb->msLastUsed;
1208 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1209
1210 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1211 {
1212#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1213 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1214 { /* likely */ }
1215 else
1216 {
1217 idxInChunk2 = 0;
1218 idxChunk2 += 1;
1219 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1220 idxChunk2 = 0;
1221 }
1222#endif
1223 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1224 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1225 if ( cMsAge2 > cMsAge
1226 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1227 {
1228 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1229 pTb = pTb2;
1230 idxChunk = idxChunk2;
1231 idxInChunk = idxInChunk2;
1232 cMsAge = cMsAge2;
1233 }
1234 }
1235
1236 /* Free the TB. */
1237 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1238 cFreedTbs++; /* paranoia */
1239 }
1240 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1241 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1242
1243 /* Flush the TB lookup entry pointer. */
1244 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1245
1246 /*
1247 * Allocate a TB from the ones we've pruned.
1248 */
1249 if (cFreedTbs)
1250 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1251 return NULL;
1252}
1253
1254
1255/**
1256 * Allocate a translation block.
1257 *
1258 * @returns Pointer to block on success, NULL if we're out and is unable to
1259 * free up an existing one (very unlikely once implemented).
1260 * @param pVCpu The cross context virtual CPU structure of the calling
1261 * thread.
1262 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1263 * For statistics.
1264 */
1265DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1266{
1267 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1268 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1269
1270 /* Free any pending TBs before we proceed. */
1271 if (!pTbAllocator->pDelayedFreeHead)
1272 { /* probably likely */ }
1273 else
1274 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1275
1276 /* If the allocator is full, take slow code path.*/
1277 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1278 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1279 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1280}
1281
1282
1283/**
1284 * This is called when we're out of space for native TBs.
1285 *
1286 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1287 * The difference is that we only prune native TBs and will only free any if
1288 * there are least two in a group. The conditions under which we're called are
1289 * different - there will probably be free TBs in the table when we're called.
1290 * Therefore we increase the group size and max scan length, though we'll stop
1291 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1292 * up at least 8 TBs.
1293 */
1294void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1295{
1296 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1297 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1298
1299 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1300
1301 /*
1302 * Flush the delayed free list before we start freeing TBs indiscriminately.
1303 */
1304 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1305
1306 /*
1307 * Scan and free TBs.
1308 */
1309 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1310 uint32_t const cTbsToPrune = 128 * 8;
1311 uint32_t const cTbsPerGroup = 4 * 4;
1312 uint32_t cFreedTbs = 0;
1313 uint32_t cMaxInstrs = 0;
1314 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1315 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1316 {
1317 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1318 idxTbPruneFrom = 0;
1319 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1320 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1321 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1322 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1323 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1324
1325 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1326 {
1327 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1328 { /* likely */ }
1329 else
1330 {
1331 idxInChunk2 = 0;
1332 idxChunk2 += 1;
1333 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1334 idxChunk2 = 0;
1335 }
1336 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1337 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1338 {
1339 cNativeTbs += 1;
1340 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1341 if ( cMsAge2 > cMsAge
1342 || ( cMsAge2 == cMsAge
1343 && ( pTb2->cUsed < pTb->cUsed
1344 || ( pTb2->cUsed == pTb->cUsed
1345 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1346 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1347 {
1348 pTb = pTb2;
1349 idxChunk = idxChunk2;
1350 idxInChunk = idxInChunk2;
1351 cMsAge = cMsAge2;
1352 }
1353 }
1354 }
1355
1356 /* Free the TB if we found at least two native one in this group. */
1357 if (cNativeTbs >= 2)
1358 {
1359 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1360 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1361 cFreedTbs++;
1362 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1363 break;
1364 }
1365 }
1366 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1367
1368 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1369}
1370
1371
1372/*********************************************************************************************************************************
1373* Threaded Recompiler Core *
1374*********************************************************************************************************************************/
1375/**
1376 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1377 * @returns pszBuf.
1378 * @param fFlags The flags.
1379 * @param pszBuf The output buffer.
1380 * @param cbBuf The output buffer size. At least 32 bytes.
1381 */
1382DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1383{
1384 Assert(cbBuf >= 32);
1385 static RTSTRTUPLE const s_aModes[] =
1386 {
1387 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1388 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1389 /* [02] = */ { RT_STR_TUPLE("!2!") },
1390 /* [03] = */ { RT_STR_TUPLE("!3!") },
1391 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1392 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1393 /* [06] = */ { RT_STR_TUPLE("!6!") },
1394 /* [07] = */ { RT_STR_TUPLE("!7!") },
1395 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1396 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1397 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1398 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1399 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1400 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1401 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1402 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1403 /* [10] = */ { RT_STR_TUPLE("!10!") },
1404 /* [11] = */ { RT_STR_TUPLE("!11!") },
1405 /* [12] = */ { RT_STR_TUPLE("!12!") },
1406 /* [13] = */ { RT_STR_TUPLE("!13!") },
1407 /* [14] = */ { RT_STR_TUPLE("!14!") },
1408 /* [15] = */ { RT_STR_TUPLE("!15!") },
1409 /* [16] = */ { RT_STR_TUPLE("!16!") },
1410 /* [17] = */ { RT_STR_TUPLE("!17!") },
1411 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1412 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1413 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1414 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1415 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1416 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1417 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1418 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1419 };
1420 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1421 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1422 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1423
1424 pszBuf[off++] = ' ';
1425 pszBuf[off++] = 'C';
1426 pszBuf[off++] = 'P';
1427 pszBuf[off++] = 'L';
1428 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1429 Assert(off < 32);
1430
1431 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1432
1433 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1434 {
1435 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1436 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1437 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1438 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1439 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1440 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1441 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1442 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1443 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1444 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1445 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1446 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1447 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1448 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1449 };
1450 if (fFlags)
1451 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1452 if (s_aFlags[i].fFlag & fFlags)
1453 {
1454 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1455 pszBuf[off++] = ' ';
1456 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1457 off += s_aFlags[i].cchName;
1458 fFlags &= ~s_aFlags[i].fFlag;
1459 if (!fFlags)
1460 break;
1461 }
1462 pszBuf[off] = '\0';
1463
1464 return pszBuf;
1465}
1466
1467
1468/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1469static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1470{
1471 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1472 pDis->cbCachedInstr += cbMaxRead;
1473 RT_NOREF(cbMinRead);
1474 return VERR_NO_DATA;
1475}
1476
1477
1478/**
1479 * Worker for iemThreadedDisassembleTb.
1480 */
1481static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1482 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1483{
1484 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1485 {
1486 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1487 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1488 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1489 {
1490 PIEMTB pLookupTb = papTbLookup[iLookup];
1491 if (pLookupTb)
1492 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1493 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1494 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1495 : "invalid");
1496 else
1497 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1498 }
1499 pHlp->pfnPrintf(pHlp, "\n");
1500 }
1501 else
1502 {
1503 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1504 idxFirst, cEntries, pTb->cTbLookupEntries);
1505 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1506 }
1507}
1508
1509
1510DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1511{
1512 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1513
1514 char szDisBuf[512];
1515
1516 /*
1517 * Print TB info.
1518 */
1519 pHlp->pfnPrintf(pHlp,
1520 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1521 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1522 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1523 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1524
1525 /*
1526 * This disassembly is driven by the debug info which follows the native
1527 * code and indicates when it starts with the next guest instructions,
1528 * where labels are and such things.
1529 */
1530 DISSTATE Dis;
1531 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1532 uint32_t const cCalls = pTb->Thrd.cCalls;
1533 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1534 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1535 : DISCPUMODE_64BIT;
1536 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1537 uint8_t idxRange = UINT8_MAX;
1538 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1539 uint32_t offRange = 0;
1540 uint32_t offOpcodes = 0;
1541 uint32_t const cbOpcodes = pTb->cbOpcodes;
1542 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1543 bool fTbLookupSeen0 = false;
1544
1545 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1546 {
1547 /*
1548 * New opcode range?
1549 */
1550 if ( idxRange == UINT8_MAX
1551 || idxRange >= cRanges
1552 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1553 {
1554 idxRange += 1;
1555 if (idxRange < cRanges)
1556 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1557 else
1558 continue;
1559 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1560 + (pTb->aRanges[idxRange].idxPhysPage == 0
1561 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1562 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1563 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1564 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1565 pTb->aRanges[idxRange].idxPhysPage);
1566 GCPhysPc += offRange;
1567 }
1568
1569 /*
1570 * Disassemble another guest instruction?
1571 */
1572 if ( paCalls[iCall].offOpcode != offOpcodes
1573 && paCalls[iCall].cbOpcode > 0
1574 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1575 {
1576 offOpcodes = paCalls[iCall].offOpcode;
1577 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1578 uint32_t cbInstr = 1;
1579 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1580 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1581 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1582 if (RT_SUCCESS(rc))
1583 {
1584 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1585 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1586 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1587 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1588 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1589 }
1590 else
1591 {
1592 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1593 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1594 cbInstr = paCalls[iCall].cbOpcode;
1595 }
1596 GCPhysPc += cbInstr;
1597 offRange += cbInstr;
1598 }
1599
1600 /*
1601 * Dump call details.
1602 */
1603 pHlp->pfnPrintf(pHlp,
1604 " Call #%u to %s (%u args)\n",
1605 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1606 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1607 if (paCalls[iCall].uTbLookup != 0)
1608 {
1609 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1610 fTbLookupSeen0 = idxFirst == 0;
1611 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1612 }
1613
1614 /*
1615 * Snoop fExec.
1616 */
1617 switch (paCalls[iCall].enmFunction)
1618 {
1619 default:
1620 break;
1621 case kIemThreadedFunc_BltIn_CheckMode:
1622 fExec = paCalls[iCall].auParams[0];
1623 break;
1624 }
1625 }
1626
1627 if (!fTbLookupSeen0)
1628 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1629}
1630
1631
1632
1633/**
1634 * Allocate a translation block for threadeded recompilation.
1635 *
1636 * This is allocated with maxed out call table and storage for opcode bytes,
1637 * because it's only supposed to be called once per EMT to allocate the TB
1638 * pointed to by IEMCPU::pThrdCompileTbR3.
1639 *
1640 * @returns Pointer to the translation block on success, NULL on failure.
1641 * @param pVM The cross context virtual machine structure.
1642 * @param pVCpu The cross context virtual CPU structure of the calling
1643 * thread.
1644 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1645 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1646 */
1647static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1648{
1649 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1650 if (pTb)
1651 {
1652 unsigned const cCalls = 256;
1653 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1654 if (pTb->Thrd.paCalls)
1655 {
1656 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1657 if (pTb->pabOpcodes)
1658 {
1659 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1660 pTb->Thrd.cAllocated = cCalls;
1661 pTb->Thrd.cCalls = 0;
1662 pTb->cbOpcodes = 0;
1663 pTb->pNext = NULL;
1664 pTb->cUsed = 0;
1665 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1666 pTb->idxAllocChunk = UINT8_MAX;
1667 pTb->GCPhysPc = GCPhysPc;
1668 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1669 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1670 pTb->cInstructions = 0;
1671 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1672
1673 /* Init the first opcode range. */
1674 pTb->cRanges = 1;
1675 pTb->aRanges[0].cbOpcodes = 0;
1676 pTb->aRanges[0].offOpcodes = 0;
1677 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1678 pTb->aRanges[0].u2Unused = 0;
1679 pTb->aRanges[0].idxPhysPage = 0;
1680 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1681 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1682
1683 return pTb;
1684 }
1685 RTMemFree(pTb->Thrd.paCalls);
1686 }
1687 RTMemFree(pTb);
1688 }
1689 RT_NOREF(pVM);
1690 return NULL;
1691}
1692
1693
1694/**
1695 * Called on the TB that are dedicated for recompilation before it's reused.
1696 *
1697 * @param pVCpu The cross context virtual CPU structure of the calling
1698 * thread.
1699 * @param pTb The translation block to reuse.
1700 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1701 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1702 */
1703static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1704{
1705 pTb->GCPhysPc = GCPhysPc;
1706 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1707 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1708 pTb->Thrd.cCalls = 0;
1709 pTb->cbOpcodes = 0;
1710 pTb->cInstructions = 0;
1711 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1712
1713 /* Init the first opcode range. */
1714 pTb->cRanges = 1;
1715 pTb->aRanges[0].cbOpcodes = 0;
1716 pTb->aRanges[0].offOpcodes = 0;
1717 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1718 pTb->aRanges[0].u2Unused = 0;
1719 pTb->aRanges[0].idxPhysPage = 0;
1720 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1721 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1722}
1723
1724
1725/**
1726 * Used to duplicate a threded translation block after recompilation is done.
1727 *
1728 * @returns Pointer to the translation block on success, NULL on failure.
1729 * @param pVM The cross context virtual machine structure.
1730 * @param pVCpu The cross context virtual CPU structure of the calling
1731 * thread.
1732 * @param pTbSrc The TB to duplicate.
1733 */
1734static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1735{
1736 /*
1737 * Just using the heap for now. Will make this more efficient and
1738 * complicated later, don't worry. :-)
1739 */
1740 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1741 if (pTb)
1742 {
1743 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1744 memcpy(pTb, pTbSrc, sizeof(*pTb));
1745 pTb->idxAllocChunk = idxAllocChunk;
1746
1747 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1748 Assert(cCalls > 0);
1749 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1750 if (pTb->Thrd.paCalls)
1751 {
1752 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1753 Assert(cbTbLookup > 0);
1754 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1755 Assert(cbOpcodes > 0);
1756 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1757 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1758 if (pbBoth)
1759 {
1760 RT_BZERO(pbBoth, cbTbLookup);
1761 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1762 pTb->Thrd.cAllocated = cCalls;
1763 pTb->pNext = NULL;
1764 pTb->cUsed = 0;
1765 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1766 pTb->fFlags = pTbSrc->fFlags;
1767
1768 return pTb;
1769 }
1770 RTMemFree(pTb->Thrd.paCalls);
1771 }
1772 iemTbAllocatorFree(pVCpu, pTb);
1773 }
1774 RT_NOREF(pVM);
1775 return NULL;
1776
1777}
1778
1779
1780/**
1781 * Adds the given TB to the hash table.
1782 *
1783 * @param pVCpu The cross context virtual CPU structure of the calling
1784 * thread.
1785 * @param pTbCache The cache to add it to.
1786 * @param pTb The translation block to add.
1787 */
1788static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1789{
1790 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1791
1792 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1793 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1794 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1795 if (LogIs12Enabled())
1796 {
1797 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1798 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1799 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1800 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1801 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1802 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1803 pTb->aRanges[idxRange].idxPhysPage == 0
1804 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1805 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1806 }
1807}
1808
1809
1810/**
1811 * Called by opcode verifier functions when they detect a problem.
1812 */
1813void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1814{
1815 /* We cannot free the current TB (indicated by fSafeToFree) because:
1816 - A threaded TB will have its current call entry accessed
1817 to update pVCpu->iem.s.cInstructions.
1818 - A native TB will have code left to execute. */
1819 if (fSafeToFree)
1820 iemTbAllocatorFree(pVCpu, pTb);
1821 else
1822 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1823}
1824
1825
1826/*
1827 * Real code.
1828 */
1829
1830#ifdef LOG_ENABLED
1831/**
1832 * Logs the current instruction.
1833 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1834 * @param pszFunction The IEM function doing the execution.
1835 * @param idxInstr The instruction number in the block.
1836 */
1837static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1838{
1839# ifdef IN_RING3
1840 if (LogIs2Enabled())
1841 {
1842 char szInstr[256];
1843 uint32_t cbInstr = 0;
1844 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1845 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1846 szInstr, sizeof(szInstr), &cbInstr);
1847
1848 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1849 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1850 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1851 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1852 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1853 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1854 " %s\n"
1855 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1856 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1857 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1858 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1859 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1860 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1861 szInstr));
1862
1863 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1864 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1865 }
1866 else
1867# endif
1868 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1869 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1870}
1871#endif /* LOG_ENABLED */
1872
1873
1874#if 0
1875static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1876{
1877 RT_NOREF(pVM, pVCpu);
1878 return rcStrict;
1879}
1880#endif
1881
1882
1883/**
1884 * Initializes the decoder state when compiling TBs.
1885 *
1886 * This presumes that fExec has already be initialized.
1887 *
1888 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1889 * to apply fixes to them as well.
1890 *
1891 * @param pVCpu The cross context virtual CPU structure of the calling
1892 * thread.
1893 * @param fReInit Clear for the first call for a TB, set for subsequent
1894 * calls from inside the compile loop where we can skip a
1895 * couple of things.
1896 * @param fExtraFlags The extra translation block flags when @a fReInit is
1897 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1898 * checked.
1899 */
1900DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1901{
1902 /* ASSUMES: That iemInitExec was already called and that anyone changing
1903 CPU state affecting the fExec bits since then will have updated fExec! */
1904 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1905 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1906
1907 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1908
1909 /* Decoder state: */
1910 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1911 pVCpu->iem.s.enmEffAddrMode = enmMode;
1912 if (enmMode != IEMMODE_64BIT)
1913 {
1914 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1915 pVCpu->iem.s.enmEffOpSize = enmMode;
1916 }
1917 else
1918 {
1919 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1920 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1921 }
1922 pVCpu->iem.s.fPrefixes = 0;
1923 pVCpu->iem.s.uRexReg = 0;
1924 pVCpu->iem.s.uRexB = 0;
1925 pVCpu->iem.s.uRexIndex = 0;
1926 pVCpu->iem.s.idxPrefix = 0;
1927 pVCpu->iem.s.uVex3rdReg = 0;
1928 pVCpu->iem.s.uVexLength = 0;
1929 pVCpu->iem.s.fEvexStuff = 0;
1930 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1931 pVCpu->iem.s.offModRm = 0;
1932 pVCpu->iem.s.iNextMapping = 0;
1933
1934 if (!fReInit)
1935 {
1936 pVCpu->iem.s.cActiveMappings = 0;
1937 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1938 pVCpu->iem.s.fEndTb = false;
1939 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
1940 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1941 pVCpu->iem.s.fTbCrossedPage = false;
1942 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1943 pVCpu->iem.s.idxLastCheckIrqCallNo = UINT16_MAX;
1944 pVCpu->iem.s.fTbCurInstrIsSti = false;
1945 /* Force RF clearing and TF checking on first instruction in the block
1946 as we don't really know what came before and should assume the worst: */
1947 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1948 }
1949 else
1950 {
1951 Assert(pVCpu->iem.s.cActiveMappings == 0);
1952 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1953 Assert(pVCpu->iem.s.fEndTb == false);
1954 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1955 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1956 }
1957 pVCpu->iem.s.fTbCurInstr = 0;
1958
1959#ifdef DBGFTRACE_ENABLED
1960 switch (IEM_GET_CPU_MODE(pVCpu))
1961 {
1962 case IEMMODE_64BIT:
1963 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1964 break;
1965 case IEMMODE_32BIT:
1966 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1967 break;
1968 case IEMMODE_16BIT:
1969 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1970 break;
1971 }
1972#endif
1973}
1974
1975
1976/**
1977 * Initializes the opcode fetcher when starting the compilation.
1978 *
1979 * @param pVCpu The cross context virtual CPU structure of the calling
1980 * thread.
1981 */
1982DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1983{
1984 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1985#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1986 pVCpu->iem.s.offOpcode = 0;
1987#else
1988 RT_NOREF(pVCpu);
1989#endif
1990}
1991
1992
1993/**
1994 * Re-initializes the opcode fetcher between instructions while compiling.
1995 *
1996 * @param pVCpu The cross context virtual CPU structure of the calling
1997 * thread.
1998 */
1999DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
2000{
2001 if (pVCpu->iem.s.pbInstrBuf)
2002 {
2003 uint64_t off = pVCpu->cpum.GstCtx.rip;
2004 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2005 off += pVCpu->cpum.GstCtx.cs.u64Base;
2006 off -= pVCpu->iem.s.uInstrBufPc;
2007 if (off < pVCpu->iem.s.cbInstrBufTotal)
2008 {
2009 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2010 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2011 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2012 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2013 else
2014 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2015 }
2016 else
2017 {
2018 pVCpu->iem.s.pbInstrBuf = NULL;
2019 pVCpu->iem.s.offInstrNextByte = 0;
2020 pVCpu->iem.s.offCurInstrStart = 0;
2021 pVCpu->iem.s.cbInstrBuf = 0;
2022 pVCpu->iem.s.cbInstrBufTotal = 0;
2023 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2024 }
2025 }
2026 else
2027 {
2028 pVCpu->iem.s.offInstrNextByte = 0;
2029 pVCpu->iem.s.offCurInstrStart = 0;
2030 pVCpu->iem.s.cbInstrBuf = 0;
2031 pVCpu->iem.s.cbInstrBufTotal = 0;
2032#ifdef VBOX_STRICT
2033 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2034#endif
2035 }
2036#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2037 pVCpu->iem.s.offOpcode = 0;
2038#endif
2039}
2040
2041#ifdef LOG_ENABLED
2042
2043/**
2044 * Inserts a NOP call.
2045 *
2046 * This is for debugging.
2047 *
2048 * @returns true on success, false if we're out of call entries.
2049 * @param pTb The translation block being compiled.
2050 */
2051bool iemThreadedCompileEmitNop(PIEMTB pTb)
2052{
2053 /* Emit the call. */
2054 uint32_t const idxCall = pTb->Thrd.cCalls;
2055 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2056 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2057 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2058 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2059 pCall->idxInstr = pTb->cInstructions - 1;
2060 pCall->cbOpcode = 0;
2061 pCall->offOpcode = 0;
2062 pCall->uTbLookup = 0;
2063 pCall->fFlags = 0;
2064 pCall->auParams[0] = 0;
2065 pCall->auParams[1] = 0;
2066 pCall->auParams[2] = 0;
2067 return true;
2068}
2069
2070
2071/**
2072 * Called by iemThreadedCompile if cpu state logging is desired.
2073 *
2074 * @returns true on success, false if we're out of call entries.
2075 * @param pTb The translation block being compiled.
2076 */
2077bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2078{
2079 /* Emit the call. */
2080 uint32_t const idxCall = pTb->Thrd.cCalls;
2081 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2082 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2083 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2084 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2085 pCall->idxInstr = pTb->cInstructions - 1;
2086 pCall->cbOpcode = 0;
2087 pCall->offOpcode = 0;
2088 pCall->uTbLookup = 0;
2089 pCall->fFlags = 0;
2090 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2091 pCall->auParams[1] = 0;
2092 pCall->auParams[2] = 0;
2093 return true;
2094}
2095
2096#endif /* LOG_ENABLED */
2097
2098DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2099{
2100 switch (cbInstr)
2101 {
2102 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2103 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2104 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2105 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2106 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2107 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2108 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2109 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2110 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2111 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2112 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2113 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2114 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2115 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2116 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2117 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2118 }
2119}
2120
2121
2122#ifdef IEM_WITH_INTRA_TB_JUMPS
2123/**
2124 * Emits the necessary tail calls for a full TB loop-jump.
2125 */
2126static bool iemThreadedCompileFullTbJump(PVMCPUCC pVCpu, PIEMTB pTb)
2127{
2128 /*
2129 * We need a timer and maybe IRQ check before jumping, so make sure
2130 * we've got sufficient call entries left before emitting anything.
2131 */
2132 uint32_t idxCall = pTb->Thrd.cCalls;
2133 if (idxCall + 1U <= pTb->Thrd.cAllocated)
2134 {
2135 /*
2136 * We're good, emit the calls.
2137 */
2138 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2139 pTb->Thrd.cCalls = (uint16_t)(idxCall + 2);
2140
2141 /* Always check timers as we risk getting stuck in a loop otherwise. We
2142 combine it with an IRQ check if that's not performed in the TB already. */
2143 pCall->enmFunction = pVCpu->iem.s.idxLastCheckIrqCallNo < idxCall
2144 ? kIemThreadedFunc_BltIn_CheckTimers
2145 : kIemThreadedFunc_BltIn_CheckTimersAndIrq;
2146 pCall->idxInstr = 0;
2147 pCall->offOpcode = 0;
2148 pCall->cbOpcode = 0;
2149 pCall->uTbLookup = 0;
2150 pCall->fFlags = 0;
2151 pCall->auParams[0] = 0;
2152 pCall->auParams[1] = 0;
2153 pCall->auParams[2] = 0;
2154 pCall++;
2155
2156 /* The jump callentry[0]. */
2157 pCall->enmFunction = kIemThreadedFunc_BltIn_Jump;
2158 pCall->idxInstr = 0;
2159 pCall->offOpcode = 0;
2160 pCall->cbOpcode = 0;
2161 pCall->uTbLookup = 0;
2162 pCall->fFlags = 0;
2163 pCall->auParams[0] = 0; /* jump target is call zero */
2164 pCall->auParams[1] = 0;
2165 pCall->auParams[2] = 0;
2166
2167 /* Mark callentry #0 as a jump target. */
2168 pTb->Thrd.paCalls[0].fFlags |= IEMTHREADEDCALLENTRY_F_JUMP_TARGET;
2169 }
2170
2171 return false;
2172}
2173#endif /* IEM_WITH_INTRA_TB_JUMPS */
2174
2175
2176/**
2177 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2178 *
2179 * - CS LIM check required.
2180 * - Must recheck opcode bytes.
2181 * - Previous instruction branched.
2182 * - TLB load detected, probably due to page crossing.
2183 *
2184 * @returns true if everything went well, false if we're out of space in the TB
2185 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2186 * @param pVCpu The cross context virtual CPU structure of the calling
2187 * thread.
2188 * @param pTb The translation block being compiled.
2189 */
2190bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2191{
2192 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2193 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2194#if 0
2195 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2196 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2197#endif
2198
2199 /*
2200 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2201 * see if it's needed to start checking.
2202 */
2203 bool fConsiderCsLimChecking;
2204 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2205 if ( fMode == IEM_F_MODE_X86_64BIT
2206 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2207 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2208 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2209 fConsiderCsLimChecking = false; /* already enabled or not needed */
2210 else
2211 {
2212 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2213 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2214 fConsiderCsLimChecking = true; /* likely */
2215 else
2216 {
2217 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2218 return false;
2219 }
2220 }
2221
2222 /*
2223 * Prepare call now, even before we know if can accept the instruction in this TB.
2224 * This allows us amending parameters w/o making every case suffer.
2225 */
2226 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2227 uint16_t const offOpcode = pTb->cbOpcodes;
2228 uint8_t idxRange = pTb->cRanges - 1;
2229
2230 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2231 pCall->idxInstr = pTb->cInstructions;
2232 pCall->cbOpcode = cbInstr;
2233 pCall->offOpcode = offOpcode;
2234 pCall->uTbLookup = 0;
2235 pCall->fFlags = 0;
2236 pCall->auParams[0] = (uint32_t)cbInstr
2237 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2238 /* The upper dword is sometimes used for cbStartPage. */;
2239 pCall->auParams[1] = idxRange;
2240 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2241
2242/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2243 * gotten onto. If we do, stop */
2244
2245 /*
2246 * Case 1: We've branched (RIP changed).
2247 *
2248 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2249 * TB, end the TB here as it is most likely a loop and if it
2250 * made sense to unroll it, the guest code compiler should've
2251 * done it already.
2252 *
2253 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2254 * Req: 1 extra range, no extra phys.
2255 *
2256 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2257 * necessary (fTbCrossedPage is true).
2258 * Req: 1 extra range, probably 1 extra phys page entry.
2259 *
2260 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2261 * but in addition we cross into the following page and require
2262 * another TLB load.
2263 * Req: 2 extra ranges, probably 2 extra phys page entries.
2264 *
2265 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2266 * the following page (thus fTbCrossedPage is true).
2267 * Req: 2 extra ranges, probably 1 extra phys page entry.
2268 *
2269 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2270 * it may trigger "spuriously" from the CPU point of view because of
2271 * physical page changes that'll invalid the physical TLB and trigger a
2272 * call to the function. In theory this be a big deal, just a bit
2273 * performance loss as we'll pick the LoadingTlb variants.
2274 *
2275 * Note! We do not currently optimize branching to the next instruction (sorry
2276 * 32-bit PIC code). We could maybe do that in the branching code that
2277 * sets (or not) fTbBranched.
2278 */
2279 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2280 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2281 * code. This'll require filtering out far jmps and calls, as they
2282 * load CS which should technically be considered indirect since the
2283 * GDT/LDT entry's base address can be modified independently from
2284 * the code. */
2285 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2286 {
2287 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2288 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2289 {
2290 /* 1a + 1b - instruction fully within the branched to page. */
2291 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2292 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2293
2294 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2295 {
2296 /* Check that we've got a free range. */
2297 idxRange += 1;
2298 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2299 { /* likely */ }
2300 else
2301 {
2302 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2303 return false;
2304 }
2305 pCall->auParams[1] = idxRange;
2306 pCall->auParams[2] = 0;
2307
2308 /* Check that we've got a free page slot. */
2309 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2310 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2311 uint8_t idxPhysPage;
2312 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2313 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2314 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2315 {
2316 pTb->aGCPhysPages[0] = GCPhysNew;
2317 pTb->aRanges[idxRange].idxPhysPage = 1;
2318 idxPhysPage = UINT8_MAX;
2319 }
2320 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2321 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2322 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2323 {
2324 pTb->aGCPhysPages[1] = GCPhysNew;
2325 pTb->aRanges[idxRange].idxPhysPage = 2;
2326 idxPhysPage = UINT8_MAX;
2327 }
2328 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2329 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2330 else
2331 {
2332 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2333 return false;
2334 }
2335
2336 /* Loop check: We weave the loop check in here to optimize the lookup. */
2337 if (idxPhysPage != UINT8_MAX)
2338 {
2339 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2340 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2341 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2342 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2343 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2344 {
2345 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2346#ifdef IEM_WITH_INTRA_TB_JUMPS
2347 /* If we're looping back to the start of the TB and the mode is still the same,
2348 we could emit a jump optimization. For now we don't do page transitions
2349 as that implies TLB loading and such. */
2350 if ( idxLoopRange == 0
2351 && offPhysPc == pTb->aRanges[0].offPhysPage
2352 && (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2353 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS)
2354 && (pVCpu->iem.s.fTbBranched & ( IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR
2355 | IEMBRANCHED_F_STACK | IEMBRANCHED_F_RELATIVE))
2356 == IEMBRANCHED_F_RELATIVE)
2357 {
2358 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected);
2359 return iemThreadedCompileFullTbJump(pVCpu, pTb);
2360 }
2361#endif
2362 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2363 return false;
2364 }
2365 }
2366
2367 /* Finish setting up the new range. */
2368 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2369 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2370 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2371 pTb->aRanges[idxRange].u2Unused = 0;
2372 pTb->cRanges++;
2373 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2374 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2375 pTb->aRanges[idxRange].offOpcodes));
2376 }
2377 else
2378 {
2379 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2380 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2381 }
2382
2383 /* Determin which function we need to load & check.
2384 Note! For jumps to a new page, we'll set both fTbBranched and
2385 fTbCrossedPage to avoid unnecessary TLB work for intra
2386 page branching */
2387 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2388 || pVCpu->iem.s.fTbCrossedPage)
2389 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2390 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2391 : !fConsiderCsLimChecking
2392 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2393 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2394 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2395 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2396 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2397 : !fConsiderCsLimChecking
2398 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2399 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2400 else
2401 {
2402 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2403 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2404 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2405 : !fConsiderCsLimChecking
2406 ? kIemThreadedFunc_BltIn_CheckOpcodes
2407 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2408 }
2409 }
2410 else
2411 {
2412 /* 1c + 1d - instruction crosses pages. */
2413 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2414 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2415
2416 /* Lazy bird: Check that this isn't case 1c, since we've already
2417 load the first physical address. End the TB and
2418 make it a case 2b instead.
2419
2420 Hmm. Too much bother to detect, so just do the same
2421 with case 1d as well. */
2422#if 0 /** @todo get back to this later when we've got the actual branch code in
2423 * place. */
2424 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2425
2426 /* Check that we've got two free ranges. */
2427 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2428 { /* likely */ }
2429 else
2430 return false;
2431 idxRange += 1;
2432 pCall->auParams[1] = idxRange;
2433 pCall->auParams[2] = 0;
2434
2435 /* ... */
2436
2437#else
2438 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2439 return false;
2440#endif
2441 }
2442 }
2443
2444 /*
2445 * Case 2: Page crossing.
2446 *
2447 * Sub-case 2a: The instruction starts on the first byte in the next page.
2448 *
2449 * Sub-case 2b: The instruction has opcode bytes in both the current and
2450 * following page.
2451 *
2452 * Both cases requires a new range table entry and probably a new physical
2453 * page entry. The difference is in which functions to emit and whether to
2454 * add bytes to the current range.
2455 */
2456 else if (pVCpu->iem.s.fTbCrossedPage)
2457 {
2458 /* Check that we've got a free range. */
2459 idxRange += 1;
2460 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2461 { /* likely */ }
2462 else
2463 {
2464 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2465 return false;
2466 }
2467
2468 /* Check that we've got a free page slot. */
2469 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2470 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2471 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2472 pTb->aRanges[idxRange].idxPhysPage = 0;
2473 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2474 || pTb->aGCPhysPages[0] == GCPhysNew)
2475 {
2476 pTb->aGCPhysPages[0] = GCPhysNew;
2477 pTb->aRanges[idxRange].idxPhysPage = 1;
2478 }
2479 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2480 || pTb->aGCPhysPages[1] == GCPhysNew)
2481 {
2482 pTb->aGCPhysPages[1] = GCPhysNew;
2483 pTb->aRanges[idxRange].idxPhysPage = 2;
2484 }
2485 else
2486 {
2487 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2488 return false;
2489 }
2490
2491 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2492 {
2493 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2494 pCall->auParams[1] = idxRange;
2495 pCall->auParams[2] = 0;
2496
2497 /* Finish setting up the new range. */
2498 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2499 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2500 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2501 pTb->aRanges[idxRange].u2Unused = 0;
2502 pTb->cRanges++;
2503 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2504 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2505 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2506
2507 /* Determin which function we need to load & check. */
2508 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2509 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2510 : !fConsiderCsLimChecking
2511 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2512 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2513 }
2514 else
2515 {
2516 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2517 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2518 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2519 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2520
2521 /* We've good. Split the instruction over the old and new range table entries. */
2522 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2523
2524 pTb->aRanges[idxRange].offPhysPage = 0;
2525 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2526 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2527 pTb->aRanges[idxRange].u2Unused = 0;
2528 pTb->cRanges++;
2529 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2530 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2531 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2532
2533 /* Determin which function we need to load & check. */
2534 if (pVCpu->iem.s.fTbCheckOpcodes)
2535 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2536 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2537 : !fConsiderCsLimChecking
2538 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2539 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2540 else
2541 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2542 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2543 : !fConsiderCsLimChecking
2544 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2545 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2546 }
2547 }
2548
2549 /*
2550 * Regular case: No new range required.
2551 */
2552 else
2553 {
2554 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2555 if (pVCpu->iem.s.fTbCheckOpcodes)
2556 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2557 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2558 : kIemThreadedFunc_BltIn_CheckOpcodes;
2559 else
2560 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2561
2562 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2563 pTb->cbOpcodes = offOpcode + cbInstr;
2564 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2565 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2566 }
2567
2568 /*
2569 * Commit the call.
2570 */
2571 pTb->Thrd.cCalls++;
2572
2573 /*
2574 * Clear state.
2575 */
2576 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2577 pVCpu->iem.s.fTbCrossedPage = false;
2578 pVCpu->iem.s.fTbCheckOpcodes = false;
2579
2580 /*
2581 * Copy opcode bytes.
2582 */
2583 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2584 pTb->cbOpcodes = offOpcode + cbInstr;
2585 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2586
2587 return true;
2588}
2589
2590
2591/**
2592 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2593 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2594 *
2595 * @returns true if anything is pending, false if not.
2596 * @param pVCpu The cross context virtual CPU structure of the calling
2597 * thread.
2598 */
2599DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2600{
2601 uint64_t fCpu = pVCpu->fLocalForcedActions;
2602 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2603#if 1
2604 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2605 if (RT_LIKELY( !fCpu
2606 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2607 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2608 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2609 return false;
2610 return true;
2611#else
2612 return false;
2613#endif
2614
2615}
2616
2617
2618/**
2619 * Called by iemThreadedCompile when a block requires a mode check.
2620 *
2621 * @returns true if we should continue, false if we're out of call entries.
2622 * @param pVCpu The cross context virtual CPU structure of the calling
2623 * thread.
2624 * @param pTb The translation block being compiled.
2625 */
2626static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2627{
2628 /* Emit the call. */
2629 uint32_t const idxCall = pTb->Thrd.cCalls;
2630 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2631 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2632 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2633 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2634 pCall->idxInstr = pTb->cInstructions - 1;
2635 pCall->cbOpcode = 0;
2636 pCall->offOpcode = 0;
2637 pCall->uTbLookup = 0;
2638 pCall->fFlags = 0;
2639 pCall->auParams[0] = pVCpu->iem.s.fExec;
2640 pCall->auParams[1] = 0;
2641 pCall->auParams[2] = 0;
2642 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2643 return true;
2644}
2645
2646
2647/**
2648 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2649 * set.
2650 *
2651 * @returns true if we should continue, false if an IRQ is deliverable or a
2652 * relevant force flag is pending.
2653 * @param pVCpu The cross context virtual CPU structure of the calling
2654 * thread.
2655 * @param pTb The translation block being compiled.
2656 * @sa iemThreadedCompileCheckIrq
2657 */
2658bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2659{
2660 /*
2661 * Skip this we've already emitted a call after the previous instruction
2662 * or if it's the first call, as we're always checking FFs between blocks.
2663 */
2664 uint32_t const idxCall = pTb->Thrd.cCalls;
2665 if ( idxCall > 0
2666 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2667 {
2668 /* Emit the call. */
2669 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2670 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2671 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2672 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2673 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2674 pCall->idxInstr = pTb->cInstructions;
2675 pCall->offOpcode = 0;
2676 pCall->cbOpcode = 0;
2677 pCall->uTbLookup = 0;
2678 pCall->fFlags = 0;
2679 pCall->auParams[0] = 0;
2680 pCall->auParams[1] = 0;
2681 pCall->auParams[2] = 0;
2682 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2683
2684 /* Reset the IRQ check value. */
2685 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2686
2687 /*
2688 * Check for deliverable IRQs and pending force flags.
2689 */
2690 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2691 }
2692 return true; /* continue */
2693}
2694
2695
2696/**
2697 * Emits an IRQ check call and checks for pending IRQs.
2698 *
2699 * @returns true if we should continue, false if an IRQ is deliverable or a
2700 * relevant force flag is pending.
2701 * @param pVCpu The cross context virtual CPU structure of the calling
2702 * thread.
2703 * @param pTb The transation block.
2704 * @sa iemThreadedCompileBeginEmitCallsComplications
2705 */
2706static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2707{
2708 /* Check again in a little bit, unless it is immediately following an STI
2709 in which case we *must* check immediately after the next instruction
2710 as well in case it's executed with interrupt inhibition. We could
2711 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2712 bs3-timers-1 which is doing sti + sti + cli. */
2713 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2714 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2715 else
2716 {
2717 pVCpu->iem.s.fTbCurInstrIsSti = false;
2718 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2719 }
2720 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2721
2722 /*
2723 * Emit the call.
2724 */
2725 uint32_t const idxCall = pTb->Thrd.cCalls;
2726 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2727 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2728 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2729 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2730 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2731 pCall->idxInstr = pTb->cInstructions;
2732 pCall->offOpcode = 0;
2733 pCall->cbOpcode = 0;
2734 pCall->uTbLookup = 0;
2735 pCall->fFlags = 0;
2736 pCall->auParams[0] = 0;
2737 pCall->auParams[1] = 0;
2738 pCall->auParams[2] = 0;
2739
2740 /*
2741 * Check for deliverable IRQs and pending force flags.
2742 */
2743 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2744}
2745
2746
2747/**
2748 * Compiles a new TB and executes it.
2749 *
2750 * We combine compilation and execution here as it makes it simpler code flow
2751 * in the main loop and it allows interpreting while compiling if we want to
2752 * explore that option.
2753 *
2754 * @returns Strict VBox status code.
2755 * @param pVM The cross context virtual machine structure.
2756 * @param pVCpu The cross context virtual CPU structure of the calling
2757 * thread.
2758 * @param GCPhysPc The physical address corresponding to the current
2759 * RIP+CS.BASE.
2760 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2761 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2762 */
2763static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2764{
2765 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2766 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2767 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2768
2769 /*
2770 * Get the TB we use for the recompiling. This is a maxed-out TB so
2771 * that'll we'll make a more efficient copy of when we're done compiling.
2772 */
2773 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2774 if (pTb)
2775 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2776 else
2777 {
2778 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2779 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2780 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2781 }
2782
2783 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2784 functions may get at it. */
2785 pVCpu->iem.s.pCurTbR3 = pTb;
2786
2787#if 0
2788 /* Make sure the CheckIrq condition matches the one in EM. */
2789 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2790 const uint32_t cZeroCalls = 1;
2791#else
2792 const uint32_t cZeroCalls = 0;
2793#endif
2794
2795 /*
2796 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2797 */
2798 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2799 iemThreadedCompileInitOpcodeFetching(pVCpu);
2800 VBOXSTRICTRC rcStrict;
2801 for (;;)
2802 {
2803 /* Process the next instruction. */
2804#ifdef LOG_ENABLED
2805 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2806 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2807 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2808 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2809#endif
2810 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2811 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2812
2813 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2814#if 0
2815 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2816 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2817 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2818#endif
2819 if ( rcStrict == VINF_SUCCESS
2820 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2821 && !pVCpu->iem.s.fEndTb)
2822 {
2823 Assert(pTb->Thrd.cCalls > cCallsPrev);
2824 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2825
2826 pVCpu->iem.s.cInstructions++;
2827
2828 /* Check for mode change _after_ certain CIMPL calls, so check that
2829 we continue executing with the same mode value. */
2830 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2831 { /* probable */ }
2832 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2833 { /* extremely likely */ }
2834 else
2835 break;
2836
2837#if defined(LOG_ENABLED) && 0 /* for debugging */
2838 //iemThreadedCompileEmitNop(pTb);
2839 iemThreadedCompileEmitLogCpuState(pTb);
2840#endif
2841 }
2842 else
2843 {
2844 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2845 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2846 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2847 rcStrict = VINF_SUCCESS;
2848
2849 if (pTb->Thrd.cCalls > cZeroCalls)
2850 {
2851 if (cCallsPrev != pTb->Thrd.cCalls)
2852 pVCpu->iem.s.cInstructions++;
2853 break;
2854 }
2855
2856 pVCpu->iem.s.pCurTbR3 = NULL;
2857 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2858 }
2859
2860 /* Check for IRQs? */
2861 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2862 pVCpu->iem.s.cInstrTillIrqCheck--;
2863 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2864 break;
2865
2866 /* Still space in the TB? */
2867 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2868 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2869 && pTb->cTbLookupEntries < 127)
2870 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2871 else
2872 {
2873 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2874 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2875 break;
2876 }
2877 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2878 }
2879
2880 /*
2881 * Reserve lookup space for the final call entry if necessary.
2882 */
2883 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2884 if (pTb->Thrd.cCalls > 1)
2885 {
2886 if (pFinalCall->uTbLookup == 0)
2887 {
2888 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2889 pTb->cTbLookupEntries += 1;
2890 }
2891 }
2892 else if (pFinalCall->uTbLookup != 0)
2893 {
2894 Assert(pTb->cTbLookupEntries > 1);
2895 pFinalCall->uTbLookup -= 1;
2896 pTb->cTbLookupEntries -= 1;
2897 }
2898
2899 /*
2900 * Duplicate the TB into a completed one and link it.
2901 */
2902 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2903 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2904
2905 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2906
2907#ifdef IEM_COMPILE_ONLY_MODE
2908 /*
2909 * Execute the translation block.
2910 */
2911#endif
2912
2913 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2914}
2915
2916
2917
2918/*********************************************************************************************************************************
2919* Recompiled Execution Core *
2920*********************************************************************************************************************************/
2921
2922/** Default TB factor.
2923 * This is basically the number of nanoseconds we guess executing a TB takes
2924 * on average. We estimates it high if we can.
2925 * @note Best if this is a power of two so it can be translated to a shift. */
2926#define IEM_TIMER_POLL_DEFAULT_FACTOR UINT32_C(64)
2927/** The minimum number of nanoseconds we can allow between timer pollings.
2928 * This must take the cost of TMTimerPollBoolWithNanoTS into mind. We put that
2929 * cost at 104 ns now, thus this constant is at 256 ns. */
2930#define IEM_TIMER_POLL_MIN_NS UINT32_C(256)
2931/** The IEM_TIMER_POLL_MIN_NS value roughly translated to TBs, with some grains
2932 * of salt thrown in.
2933 * The idea is that we will be able to make progress with guest code execution
2934 * before polling timers and between running timers. */
2935#define IEM_TIMER_POLL_MIN_ITER UINT32_C(12)
2936/** The maximum number of nanoseconds we can allow between timer pollings.
2937 * This probably shouldn't be too high, as we don't have any timer
2938 * reprogramming feedback in the polling code. So, when a device reschedule a
2939 * timer for an earlier delivery, we won't know about it. */
2940#define IEM_TIMER_POLL_MAX_NS UINT32_C(8388608) /* 0x800000 ns = 8.4 ms */
2941/** The IEM_TIMER_POLL_MAX_NS value roughly translated to TBs, with some grains
2942 * of salt thrown in.
2943 * This helps control fluctuations in the NU benchmark. */
2944#define IEM_TIMER_POLL_MAX_ITER _512K
2945
2946#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
2947/**
2948 * Calculates the number of TBs till the next timer polling using defaults.
2949 *
2950 * This is used when the previous run wasn't long enough to provide sufficient
2951 * data and when comming back from the HALT state and we haven't actually
2952 * executed anything for a while.
2953 */
2954DECL_FORCE_INLINE(uint32_t) iemPollTimersCalcDefaultCountdown(uint64_t cNsDelta) RT_NOEXCEPT
2955{
2956 if (cNsDelta >= IEM_TIMER_POLL_MAX_NS)
2957 return RT_MIN(IEM_TIMER_POLL_MAX_NS / IEM_TIMER_POLL_DEFAULT_FACTOR, IEM_TIMER_POLL_MAX_ITER);
2958
2959 cNsDelta = RT_BIT_64(ASMBitFirstSetU32(cNsDelta) - 1); /* round down to power of 2 */
2960 uint32_t const cRet = cNsDelta / IEM_TIMER_POLL_DEFAULT_FACTOR;
2961 if (cRet >= IEM_TIMER_POLL_MIN_ITER)
2962 {
2963 if (cRet <= IEM_TIMER_POLL_MAX_ITER)
2964 return cRet;
2965 return IEM_TIMER_POLL_MAX_ITER;
2966 }
2967 return IEM_TIMER_POLL_MIN_ITER;
2968}
2969#endif
2970
2971
2972/**
2973 * Helper for polling timers.
2974 */
2975DECLHIDDEN(int) iemPollTimers(PVMCC pVM, PVMCPUCC pVCpu) RT_NOEXCEPT
2976{
2977 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPoll, a);
2978
2979 /*
2980 * Check for VM_FF_TM_VIRTUAL_SYNC and call TMR3VirtualSyncFF if set.
2981 * This is something all EMTs can do.
2982 */
2983 /* If the virtual sync FF is set, respond to it. */
2984 bool fRanTimers = VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC);
2985 if (!fRanTimers)
2986 { /* likely */ }
2987 else
2988 {
2989 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
2990 TMR3VirtualSyncFF(pVM, pVCpu);
2991 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
2992 }
2993
2994 /*
2995 * Poll timers.
2996 *
2997 * On the 10980xe the polling averaging 314 ticks, with a min of 201, while
2998 * running a norton utilities DOS benchmark program. TSC runs at 3GHz,
2999 * translating that to 104 ns and 67 ns respectively. (An M2 booting win11
3000 * has an average of 2 ticks / 84 ns.)
3001 *
3002 * With the same setup the TMR3VirtualSyncFF and else branch here profiles
3003 * to 79751 ticks / 26583 ns on average, with a min of 1194 ticks / 398 ns.
3004 * (An M2 booting win11 has an average of 24 ticks / 1008 ns, with a min of
3005 * 8 ticks / 336 ns.)
3006 *
3007 * If we get a zero return value we run timers. Non-timer EMTs shouldn't
3008 * ever see a zero value here, so we just call TMR3TimerQueuesDo. However,
3009 * we do not re-run timers if we already called TMR3VirtualSyncFF above, we
3010 * try to make sure some code is executed first.
3011 */
3012 uint64_t nsNow = 0;
3013 uint64_t cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3014 if (cNsDelta >= 1) /* It is okay to run virtual sync timers a little early. */
3015 { /* likely */ }
3016 else if (!fRanTimers || VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
3017 {
3018 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3019 TMR3TimerQueuesDo(pVM);
3020 fRanTimers = true;
3021 nsNow = 0;
3022 cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3023 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3024 }
3025 else
3026 cNsDelta = 33;
3027
3028 /*
3029 * Calc interval and update the timestamps.
3030 */
3031 uint64_t const cNsSinceLast = nsNow - pVCpu->iem.s.nsRecompilerPollNow;
3032 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3033 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3034
3035 /*
3036 * Set the next polling count down value.
3037 *
3038 * We take the previous value and adjust it according to the cNsSinceLast
3039 * value, if it's not within reason. This can't be too accurate since the
3040 * CheckIrq and intra-TB-checks aren't evenly spaced, they depends highly
3041 * on the guest code.
3042 */
3043#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3044 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3045 if (cNsDelta >= RT_NS_1SEC / 4)
3046 {
3047 /*
3048 * Non-timer EMTs should end up here with a fixed 500ms delta, just return
3049 * the max and keep the polling over head to the deadicated timer EMT.
3050 */
3051 AssertCompile(IEM_TIMER_POLL_MAX_ITER * IEM_TIMER_POLL_DEFAULT_FACTOR <= RT_NS_100MS);
3052 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3053 }
3054 else
3055 {
3056 /*
3057 * This is the timer EMT.
3058 */
3059 if (cNsDelta <= IEM_TIMER_POLL_MIN_NS)
3060 {
3061 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollTiny);
3062 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3063 }
3064 else
3065 {
3066 uint32_t const cNsDeltaAdj = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS : (uint32_t)cNsDelta;
3067 uint32_t const cNsDeltaSlack = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS / 2 : cNsDeltaAdj / 4;
3068 if ( cNsSinceLast < RT_MAX(IEM_TIMER_POLL_MIN_NS, 64)
3069 || cItersTillNextPoll < IEM_TIMER_POLL_MIN_ITER /* paranoia */)
3070 {
3071 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollDefaultCalc);
3072 cItersTillNextPoll = iemPollTimersCalcDefaultCountdown(cNsDeltaAdj);
3073 }
3074 else if ( cNsSinceLast >= cNsDeltaAdj + cNsDeltaSlack
3075 || cNsSinceLast <= cNsDeltaAdj - cNsDeltaSlack)
3076 {
3077 if (cNsSinceLast >= cItersTillNextPoll)
3078 {
3079 uint32_t uFactor = (uint32_t)(cNsSinceLast + cItersTillNextPoll - 1) / cItersTillNextPoll;
3080 cItersTillNextPoll = cNsDeltaAdj / uFactor;
3081 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorDivision, uFactor);
3082 }
3083 else
3084 {
3085 uint32_t uFactor = cItersTillNextPoll / (uint32_t)cNsSinceLast;
3086 cItersTillNextPoll = cNsDeltaAdj * uFactor;
3087 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorMultiplication, uFactor);
3088 }
3089
3090 if (cItersTillNextPoll >= IEM_TIMER_POLL_MIN_ITER)
3091 {
3092 if (cItersTillNextPoll <= IEM_TIMER_POLL_MAX_ITER)
3093 { /* likely */ }
3094 else
3095 {
3096 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollMax);
3097 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3098 }
3099 }
3100 else
3101 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3102 }
3103 else
3104 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollUnchanged);
3105 }
3106 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3107 }
3108#else
3109/** Poll timers every 400 us / 2500 Hz. (source: thin air) */
3110# define IEM_TIMER_POLL_IDEAL_NS (400U * RT_NS_1US)
3111 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3112 uint32_t const cNsIdealPollInterval = IEM_TIMER_POLL_IDEAL_NS;
3113 int64_t const nsFromIdeal = cNsSinceLast - cNsIdealPollInterval;
3114 if (nsFromIdeal < 0)
3115 {
3116 if ((uint64_t)-nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll < _64K)
3117 {
3118 cItersTillNextPoll += cItersTillNextPoll / 8;
3119 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3120 }
3121 }
3122 else
3123 {
3124 if ((uint64_t)nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll > 256)
3125 {
3126 cItersTillNextPoll -= cItersTillNextPoll / 8;
3127 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3128 }
3129 }
3130#endif
3131 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillNextPoll;
3132
3133 /*
3134 * Repeat the IRQ and FF checks.
3135 */
3136 if (cNsDelta > 0)
3137 {
3138 uint32_t fCpu = pVCpu->fLocalForcedActions;
3139 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3140 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3141 | VMCPU_FF_TLB_FLUSH
3142 | VMCPU_FF_UNHALT );
3143 if (RT_LIKELY( ( !fCpu
3144 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3145 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3146 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx)) ) )
3147 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
3148 {
3149 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3150 return VINF_SUCCESS;
3151 }
3152 }
3153 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3154 return VINF_IEM_REEXEC_BREAK_FF;
3155}
3156
3157
3158/** Helper for iemTbExec. */
3159DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
3160{
3161 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
3162 Assert(idx < pTb->cTbLookupEntries);
3163 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
3164}
3165
3166
3167/**
3168 * Executes a translation block.
3169 *
3170 * @returns Strict VBox status code.
3171 * @param pVCpu The cross context virtual CPU structure of the calling
3172 * thread.
3173 * @param pTb The translation block to execute.
3174 */
3175static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
3176{
3177 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
3178
3179 /*
3180 * Set the current TB so CIMPL functions may get at it.
3181 */
3182 pVCpu->iem.s.pCurTbR3 = pTb;
3183 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
3184
3185 /*
3186 * Execute the block.
3187 */
3188#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3189 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
3190 {
3191 pVCpu->iem.s.cTbExecNative++;
3192 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
3193# ifdef LOG_ENABLED
3194 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
3195# endif
3196
3197# ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
3198# ifdef RT_ARCH_AMD64
3199 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
3200# else
3201 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
3202# endif
3203# else
3204# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3205 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
3206# endif
3207# ifdef RT_ARCH_AMD64
3208 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
3209# else
3210 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
3211# endif
3212# endif
3213
3214# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3215 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3216# endif
3217# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3218 /* Restore FPCR/MXCSR if the TB modified it. */
3219 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3220 {
3221 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3222 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3223 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3224 }
3225# endif
3226# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3227 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
3228# endif
3229 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3230 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3231 { /* likely */ }
3232 else
3233 {
3234 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
3235 pVCpu->iem.s.pCurTbR3 = NULL;
3236
3237 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3238 only to break out of TB execution early. */
3239 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3240 {
3241 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
3242 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3243 }
3244
3245 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
3246 only to break out of TB execution early due to pending FFs. */
3247 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
3248 {
3249 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
3250 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3251 }
3252
3253 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
3254 and converted to VINF_SUCCESS or whatever is appropriate. */
3255 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
3256 {
3257 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
3258 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
3259 }
3260
3261 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
3262 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3263 }
3264 }
3265 else
3266#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
3267 {
3268 /*
3269 * The threaded execution loop.
3270 */
3271 pVCpu->iem.s.cTbExecThreaded++;
3272 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
3273#ifdef LOG_ENABLED
3274 uint64_t uRipPrev = UINT64_MAX;
3275#endif
3276 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3277 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3278 while (cCallsLeft-- > 0)
3279 {
3280#ifdef LOG_ENABLED
3281 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
3282 {
3283 uRipPrev = pVCpu->cpum.GstCtx.rip;
3284 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
3285 }
3286 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
3287 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
3288 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
3289#endif
3290#ifdef VBOX_WITH_STATISTICS
3291 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
3292 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
3293#endif
3294 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
3295 pCallEntry->auParams[0],
3296 pCallEntry->auParams[1],
3297 pCallEntry->auParams[2]);
3298 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3299 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3300 pCallEntry++;
3301 else if (rcStrict == VINF_IEM_REEXEC_JUMP)
3302 {
3303 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
3304 Assert(cCallsLeft == 0);
3305 uint32_t const idxTarget = (uint32_t)pCallEntry->auParams[0];
3306 cCallsLeft = pTb->Thrd.cCalls;
3307 AssertBreak(idxTarget < cCallsLeft - 1);
3308 cCallsLeft -= idxTarget;
3309 pCallEntry = &pTb->Thrd.paCalls[idxTarget];
3310 AssertBreak(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET);
3311 }
3312 else
3313 {
3314 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
3315 pVCpu->iem.s.pCurTbR3 = NULL;
3316 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
3317 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
3318
3319 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3320 only to break out of TB execution early. */
3321 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3322 {
3323#ifdef VBOX_WITH_STATISTICS
3324 if (pCallEntry->uTbLookup)
3325 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3326 else
3327 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3328#endif
3329 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3330 }
3331 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3332 }
3333 }
3334
3335 /* Update the lookup entry. */
3336 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3337 }
3338
3339 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3340 pVCpu->iem.s.pCurTbR3 = NULL;
3341 return VINF_SUCCESS;
3342}
3343
3344
3345/**
3346 * This is called when the PC doesn't match the current pbInstrBuf.
3347 *
3348 * Upon return, we're ready for opcode fetching. But please note that
3349 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3350 * MMIO or unassigned).
3351 */
3352static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3353{
3354 pVCpu->iem.s.pbInstrBuf = NULL;
3355 pVCpu->iem.s.offCurInstrStart = 0;
3356 pVCpu->iem.s.offInstrNextByte = 0;
3357 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3358 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3359}
3360
3361
3362/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3363DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3364{
3365 /*
3366 * Set uCurTbStartPc to RIP and calc the effective PC.
3367 */
3368 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3369#if 0 /* unused */
3370 pVCpu->iem.s.uCurTbStartPc = uPc;
3371#endif
3372 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3373 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3374
3375 /*
3376 * Advance within the current buffer (PAGE) when possible.
3377 */
3378 if (pVCpu->iem.s.pbInstrBuf)
3379 {
3380 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3381 if (off < pVCpu->iem.s.cbInstrBufTotal)
3382 {
3383 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3384 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3385 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3386 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3387 else
3388 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3389
3390 return pVCpu->iem.s.GCPhysInstrBuf + off;
3391 }
3392 }
3393 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3394}
3395
3396
3397/**
3398 * Determines the extra IEMTB_F_XXX flags.
3399 *
3400 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3401 * IEMTB_F_CS_LIM_CHECKS (or zero).
3402 * @param pVCpu The cross context virtual CPU structure of the calling
3403 * thread.
3404 */
3405DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3406{
3407 uint32_t fRet = 0;
3408
3409 /*
3410 * Determine the inhibit bits.
3411 */
3412 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3413 { /* typical */ }
3414 else
3415 {
3416 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3417 fRet |= IEMTB_F_INHIBIT_SHADOW;
3418 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3419 fRet |= IEMTB_F_INHIBIT_NMI;
3420 }
3421
3422 /*
3423 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3424 * likely to go invalid before the end of the translation block.
3425 */
3426 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3427 return fRet;
3428
3429 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3430 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3431 return fRet;
3432 return fRet | IEMTB_F_CS_LIM_CHECKS;
3433}
3434
3435
3436VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu, bool fWasHalted)
3437{
3438 /*
3439 * See if there is an interrupt pending in TRPM, inject it if we can.
3440 */
3441 if (!TRPMHasTrap(pVCpu))
3442 { /* likely */ }
3443 else
3444 {
3445 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3446 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3447 { /*likely */ }
3448 else
3449 return rcStrict;
3450 }
3451
3452 /*
3453 * Init the execution environment.
3454 */
3455#if 1 /** @todo this seems like a good idea, however if we ever share memory
3456 * directly with other threads on the host, it isn't necessarily... */
3457 if (pVM->cCpus == 1)
3458 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3459 else
3460#endif
3461 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3462
3463 if (RT_LIKELY(!fWasHalted && pVCpu->iem.s.msRecompilerPollNow != 0))
3464 { }
3465 else
3466 {
3467 /* Do polling after halt and the first time we get here. */
3468#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3469 uint64_t nsNow = 0;
3470 uint32_t const cItersTillPoll = iemPollTimersCalcDefaultCountdown(TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow));
3471 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillPoll;
3472 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillPoll;
3473#else
3474 uint64_t const nsNow = TMVirtualGetNoCheck(pVM);
3475#endif
3476 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3477 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3478 }
3479 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3480
3481 /*
3482 * Run-loop.
3483 *
3484 * If we're using setjmp/longjmp we combine all the catching here to avoid
3485 * having to call setjmp for each block we're executing.
3486 */
3487 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3488 for (;;)
3489 {
3490 VBOXSTRICTRC rcStrict;
3491 IEM_TRY_SETJMP(pVCpu, rcStrict)
3492 {
3493 for (;;)
3494 {
3495 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3496 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3497 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3498 {
3499 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3500 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3501 if (pTb)
3502 rcStrict = iemTbExec(pVCpu, pTb);
3503 else
3504 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3505 }
3506 else
3507 {
3508 /* This can only happen if the current PC cannot be translated into a
3509 host pointer, which means we're in MMIO or unmapped memory... */
3510#if defined(VBOX_STRICT) && defined(IN_RING3)
3511 rcStrict = DBGFSTOP(pVM);
3512 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3513 return rcStrict;
3514#endif
3515 rcStrict = IEMExecLots(pVCpu, 2048, 511, NULL);
3516 }
3517 if (rcStrict == VINF_SUCCESS)
3518 {
3519 Assert(pVCpu->iem.s.cActiveMappings == 0);
3520
3521 /* Note! This IRQ/FF check is repeated in iemPollTimers, iemThreadedFunc_BltIn_CheckIrq
3522 and emitted by iemNativeRecompFunc_BltIn_CheckIrq. */
3523 uint64_t fCpu = pVCpu->fLocalForcedActions;
3524 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3525 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3526 | VMCPU_FF_TLB_FLUSH
3527 | VMCPU_FF_UNHALT );
3528 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3529 if (RT_LIKELY( ( !fCpu
3530 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3531 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3532 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3533 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3534 {
3535 /* Once in a while we need to poll timers here. */
3536 if ((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
3537 { /* likely */ }
3538 else
3539 {
3540 int rc = iemPollTimers(pVM, pVCpu);
3541 if (rc != VINF_SUCCESS)
3542 return VINF_SUCCESS;
3543 }
3544 }
3545 else
3546 return VINF_SUCCESS;
3547 }
3548 else
3549 return rcStrict;
3550 }
3551 }
3552 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3553 {
3554 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3555 pVCpu->iem.s.cLongJumps++;
3556#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3557 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3558#endif
3559 if (pVCpu->iem.s.cActiveMappings > 0)
3560 iemMemRollback(pVCpu);
3561
3562#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3563 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3564 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3565 {
3566 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3567# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3568 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3569 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3570# endif
3571
3572#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3573 /* Restore FPCR/MXCSR if the TB modified it. */
3574 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3575 {
3576 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3577 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3578 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3579 }
3580#endif
3581 }
3582#endif
3583
3584#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3585 /* If pTb isn't NULL we're in iemTbExec. */
3586 if (!pTb)
3587 {
3588 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3589 pTb = pVCpu->iem.s.pCurTbR3;
3590 if (pTb)
3591 {
3592 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3593 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3594 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3595 }
3596 }
3597#endif
3598 pVCpu->iem.s.pCurTbR3 = NULL;
3599 return rcStrict;
3600 }
3601 IEM_CATCH_LONGJMP_END(pVCpu);
3602 }
3603}
3604
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette