VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 105844

Last change on this file since 105844 was 105805, checked in by vboxsync, 3 months ago

VMM/IEM: End TB if we get back to the first instruction again via an indirect route, e.g. jumping to a point before the TB starts, and optimize this using the loop-jump (todo 14). bugref:10720 bugref:10656

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 146.4 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 105805 2024-08-21 23:52:56Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
117# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
118#endif
119
120
121/**
122 * Calculates the effective address of a ModR/M memory operand, extended version
123 * for use in the recompilers.
124 *
125 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
126 *
127 * May longjmp on internal error.
128 *
129 * @return The effective address.
130 * @param pVCpu The cross context virtual CPU structure of the calling thread.
131 * @param bRm The ModRM byte.
132 * @param cbImmAndRspOffset - First byte: The size of any immediate
133 * following the effective address opcode bytes
134 * (only for RIP relative addressing).
135 * - Second byte: RSP displacement (for POP [ESP]).
136 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
137 * SIB byte (bits 39:32).
138 *
139 * @note This must be defined in a source file with matching
140 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
141 * or implemented differently...
142 */
143RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
144{
145 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
146# define SET_SS_DEF() \
147 do \
148 { \
149 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
150 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
151 } while (0)
152
153 if (!IEM_IS_64BIT_CODE(pVCpu))
154 {
155/** @todo Check the effective address size crap! */
156 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
157 {
158 uint16_t u16EffAddr;
159
160 /* Handle the disp16 form with no registers first. */
161 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
162 {
163 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
164 *puInfo = u16EffAddr;
165 }
166 else
167 {
168 /* Get the displacment. */
169 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
170 {
171 case 0: u16EffAddr = 0; break;
172 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
173 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
174 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
175 }
176 *puInfo = u16EffAddr;
177
178 /* Add the base and index registers to the disp. */
179 switch (bRm & X86_MODRM_RM_MASK)
180 {
181 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
182 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
183 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
184 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
185 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
186 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
187 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
188 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
189 }
190 }
191
192 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
193 return u16EffAddr;
194 }
195
196 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
197 uint32_t u32EffAddr;
198 uint64_t uInfo;
199
200 /* Handle the disp32 form with no registers first. */
201 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
202 {
203 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
204 uInfo = u32EffAddr;
205 }
206 else
207 {
208 /* Get the register (or SIB) value. */
209 uInfo = 0;
210 switch ((bRm & X86_MODRM_RM_MASK))
211 {
212 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
213 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
214 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
215 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
216 case 4: /* SIB */
217 {
218 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
219 uInfo = (uint64_t)bSib << 32;
220
221 /* Get the index and scale it. */
222 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
223 {
224 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
225 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
226 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
227 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
228 case 4: u32EffAddr = 0; /*none */ break;
229 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
230 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
231 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
232 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
233 }
234 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
235
236 /* add base */
237 switch (bSib & X86_SIB_BASE_MASK)
238 {
239 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
240 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
241 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
242 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
243 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
244 case 5:
245 if ((bRm & X86_MODRM_MOD_MASK) != 0)
246 {
247 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
248 SET_SS_DEF();
249 }
250 else
251 {
252 uint32_t u32Disp;
253 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
254 u32EffAddr += u32Disp;
255 uInfo |= u32Disp;
256 }
257 break;
258 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
259 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
260 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
261 }
262 break;
263 }
264 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
265 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
266 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
267 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
268 }
269
270 /* Get and add the displacement. */
271 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
272 {
273 case 0:
274 break;
275 case 1:
276 {
277 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
278 u32EffAddr += i8Disp;
279 uInfo |= (uint32_t)(int32_t)i8Disp;
280 break;
281 }
282 case 2:
283 {
284 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
285 u32EffAddr += u32Disp;
286 uInfo |= u32Disp;
287 break;
288 }
289 default:
290 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
291 }
292 }
293
294 *puInfo = uInfo;
295 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
296 return u32EffAddr;
297 }
298
299 uint64_t u64EffAddr;
300 uint64_t uInfo;
301
302 /* Handle the rip+disp32 form with no registers first. */
303 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
304 {
305 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
306 uInfo = (uint32_t)u64EffAddr;
307 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
308 }
309 else
310 {
311 /* Get the register (or SIB) value. */
312 uInfo = 0;
313 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
314 {
315 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
316 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
317 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
318 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
319 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
320 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
321 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
322 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
323 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
324 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
325 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
326 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
327 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
328 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
329 /* SIB */
330 case 4:
331 case 12:
332 {
333 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
334 uInfo = (uint64_t)bSib << 32;
335
336 /* Get the index and scale it. */
337 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
338 {
339 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
340 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
341 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
342 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
343 case 4: u64EffAddr = 0; /*none */ break;
344 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
345 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
346 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
347 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
348 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
349 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
350 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
351 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
352 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
353 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
354 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
355 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
356 }
357 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
358
359 /* add base */
360 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
361 {
362 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
363 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
364 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
365 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
366 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
367 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
368 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
369 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
370 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
371 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
372 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
373 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
374 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
375 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
376 /* complicated encodings */
377 case 5:
378 case 13:
379 if ((bRm & X86_MODRM_MOD_MASK) != 0)
380 {
381 if (!pVCpu->iem.s.uRexB)
382 {
383 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
384 SET_SS_DEF();
385 }
386 else
387 u64EffAddr += pVCpu->cpum.GstCtx.r13;
388 }
389 else
390 {
391 uint32_t u32Disp;
392 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
393 u64EffAddr += (int32_t)u32Disp;
394 uInfo |= u32Disp;
395 }
396 break;
397 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
398 }
399 break;
400 }
401 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
402 }
403
404 /* Get and add the displacement. */
405 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
406 {
407 case 0:
408 break;
409 case 1:
410 {
411 int8_t i8Disp;
412 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
413 u64EffAddr += i8Disp;
414 uInfo |= (uint32_t)(int32_t)i8Disp;
415 break;
416 }
417 case 2:
418 {
419 uint32_t u32Disp;
420 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
421 u64EffAddr += (int32_t)u32Disp;
422 uInfo |= u32Disp;
423 break;
424 }
425 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
426 }
427
428 }
429
430 *puInfo = uInfo;
431 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
432 {
433 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
434 return u64EffAddr;
435 }
436 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
437 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
438 return u64EffAddr & UINT32_MAX;
439}
440
441
442/*********************************************************************************************************************************
443* Translation Block Cache. *
444*********************************************************************************************************************************/
445
446/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
447static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
448{
449 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
450 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
451 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
452 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
453 if (cMsSinceUse1 != cMsSinceUse2)
454 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
455 if (pTb1->cUsed != pTb2->cUsed)
456 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
457 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
458 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
459 return 0;
460}
461
462#ifdef VBOX_STRICT
463/**
464 * Assertion helper that checks a collisions list count.
465 */
466static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
467{
468 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
469 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
470 while (pTb)
471 {
472 pTb = pTb->pNext;
473 cLeft--;
474 }
475 AssertMsg(cLeft == 0,
476 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
477 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
478}
479#endif
480
481
482DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
483{
484 STAM_PROFILE_START(&pTbCache->StatPrune, a);
485
486 /*
487 * First convert the collision list to an array.
488 */
489 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
490 uintptr_t cInserted = 0;
491 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
492
493 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
494
495 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
496 {
497 apSortedTbs[cInserted++] = pTbCollision;
498 pTbCollision = pTbCollision->pNext;
499 }
500
501 /* Free any excess (impossible). */
502 if (RT_LIKELY(!pTbCollision))
503 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
504 else
505 do
506 {
507 PIEMTB pTbToFree = pTbCollision;
508 pTbCollision = pTbToFree->pNext;
509 iemTbAllocatorFree(pVCpu, pTbToFree);
510 } while (pTbCollision);
511
512 /*
513 * Sort it by most recently used and usage count.
514 */
515 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
516
517 /* We keep half the list for now. Perhaps a bit aggressive... */
518 uintptr_t const cKeep = cInserted / 2;
519
520 /* First free up the TBs we don't wish to keep (before creating the new
521 list because otherwise the free code will scan the list for each one
522 without ever finding it). */
523 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
524 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
525
526 /* Then chain the new TB together with the ones we like to keep of the
527 existing ones and insert this list into the hash table. */
528 pTbCollision = pTb;
529 for (uintptr_t idx = 0; idx < cKeep; idx++)
530 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
531 pTbCollision->pNext = NULL;
532
533 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
534#ifdef VBOX_STRICT
535 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
536#endif
537
538 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
539}
540
541
542static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
543{
544 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
545 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
546 if (!pTbOldHead)
547 {
548 pTb->pNext = NULL;
549 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
550 }
551 else
552 {
553 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
554 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
555 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
556 {
557 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
558 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
559#ifdef VBOX_STRICT
560 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
561#endif
562 }
563 else
564 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
565 }
566}
567
568
569/**
570 * Unlinks @a pTb from the hash table if found in it.
571 *
572 * @returns true if unlinked, false if not present.
573 * @param pTbCache The hash table.
574 * @param pTb The TB to remove.
575 */
576static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
577{
578 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
579 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
580 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
581
582 /*
583 * At the head of the collision list?
584 */
585 if (pTbHash == pTb)
586 {
587 if (!pTb->pNext)
588 pTbCache->apHash[idxHash] = NULL;
589 else
590 {
591 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
592 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
593#ifdef VBOX_STRICT
594 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
595#endif
596 }
597 return true;
598 }
599
600 /*
601 * Search the collision list.
602 */
603 PIEMTB const pTbHead = pTbHash;
604 while (pTbHash)
605 {
606 PIEMTB const pNextTb = pTbHash->pNext;
607 if (pNextTb == pTb)
608 {
609 pTbHash->pNext = pTb->pNext;
610 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
611#ifdef VBOX_STRICT
612 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
613#endif
614 return true;
615 }
616 pTbHash = pNextTb;
617 }
618 return false;
619}
620
621
622/**
623 * Looks up a TB for the given PC and flags in the cache.
624 *
625 * @returns Pointer to TB on success, NULL if not found.
626 * @param pVCpu The cross context virtual CPU structure of the
627 * calling thread.
628 * @param pTbCache The translation block cache.
629 * @param GCPhysPc The PC to look up a TB for.
630 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
631 * the lookup.
632 * @thread EMT(pVCpu)
633 */
634static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
635 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
636{
637 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
638
639 /*
640 * First consult the lookup table entry.
641 */
642 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
643 PIEMTB pTb = *ppTbLookup;
644 if (pTb)
645 {
646 if (pTb->GCPhysPc == GCPhysPc)
647 {
648 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
649 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
650 {
651 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
652 {
653 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
654 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
655 pTb->cUsed++;
656#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
657 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
658 {
659 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
660 return pTb;
661 }
662 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
663 return iemNativeRecompile(pVCpu, pTb);
664#else
665 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
666 return pTb;
667#endif
668 }
669 }
670 }
671 }
672
673 /*
674 * Then consult the hash table.
675 */
676 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
677#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
678 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
679#endif
680 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
681 while (pTb)
682 {
683 if (pTb->GCPhysPc == GCPhysPc)
684 {
685 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
686 {
687 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
688 {
689 STAM_COUNTER_INC(&pTbCache->cLookupHits);
690 AssertMsg(cLeft > 0, ("%d\n", cLeft));
691
692 *ppTbLookup = pTb;
693 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
694 pTb->cUsed++;
695#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
696 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
697 {
698 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
699 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
700 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
701 return pTb;
702 }
703 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
704 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
705 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
706 return iemNativeRecompile(pVCpu, pTb);
707#else
708 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
709 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
710 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
711 return pTb;
712#endif
713 }
714 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
715 }
716 else
717 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
718 }
719 else
720 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
721
722 pTb = pTb->pNext;
723#ifdef VBOX_STRICT
724 cLeft--;
725#endif
726 }
727 AssertMsg(cLeft == 0, ("%d\n", cLeft));
728 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
729 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
730 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
731 return pTb;
732}
733
734
735/*********************************************************************************************************************************
736* Translation Block Allocator.
737*********************************************************************************************************************************/
738/*
739 * Translation block allocationmanagement.
740 */
741
742#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
743# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
744 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
745# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
746 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
747# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
748 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
749#else
750# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
751 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
752# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
753 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
754# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
755 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
756#endif
757/** Makes a TB index from a chunk index and TB index within that chunk. */
758#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
759 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
760
761
762/**
763 * Initializes the TB allocator and cache for an EMT.
764 *
765 * @returns VBox status code.
766 * @param pVM The VM handle.
767 * @param cInitialTbs The initial number of translation blocks to
768 * preallocator.
769 * @param cMaxTbs The max number of translation blocks allowed.
770 * @param cbInitialExec The initial size of the executable memory allocator.
771 * @param cbMaxExec The max size of the executable memory allocator.
772 * @param cbChunkExec The chunk size for executable memory allocator. Zero
773 * or UINT32_MAX for automatically determining this.
774 * @thread EMT
775 */
776DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
777 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
778{
779 PVMCPUCC pVCpu = VMMGetCpu(pVM);
780 Assert(!pVCpu->iem.s.pTbCacheR3);
781 Assert(!pVCpu->iem.s.pTbAllocatorR3);
782
783 /*
784 * Calculate the chunk size of the TB allocator.
785 * The minimum chunk size is 2MiB.
786 */
787 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
788 uint32_t cbPerChunk = _2M;
789 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
790#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
791 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
792 uint8_t cChunkShift = 21 - cTbShift;
793 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
794#endif
795 for (;;)
796 {
797 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
798 break;
799 cbPerChunk *= 2;
800 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
801#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
802 cChunkShift += 1;
803#endif
804 }
805
806 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
807 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
808 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
809
810 cMaxTbs = cMaxChunks * cTbsPerChunk;
811
812 /*
813 * Allocate and initalize it.
814 */
815 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
816 if (!pTbAllocator)
817 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
818 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
819 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
820 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
821 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
822 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
823 pTbAllocator->cbPerChunk = cbPerChunk;
824 pTbAllocator->cMaxTbs = cMaxTbs;
825 pTbAllocator->pTbsFreeHead = NULL;
826#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
827 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
828 pTbAllocator->cChunkShift = cChunkShift;
829 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
830#endif
831
832 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
833
834 /*
835 * Allocate the initial chunks.
836 */
837 for (uint32_t idxChunk = 0; ; idxChunk++)
838 {
839 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
840 if (!paTbs)
841 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
842 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
843 cbPerChunk, idxChunk, pVCpu->idCpu);
844
845 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
846 {
847 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
848 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
849 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
850 }
851 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
852 pTbAllocator->cTotalTbs += cTbsPerChunk;
853
854 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
855 break;
856 }
857
858 /*
859 * Calculate the size of the hash table. We double the max TB count and
860 * round it up to the nearest power of two.
861 */
862 uint32_t cCacheEntries = cMaxTbs * 2;
863 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
864 {
865 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
866 cCacheEntries = RT_BIT_32(iBitTop);
867 Assert(cCacheEntries >= cMaxTbs * 2);
868 }
869
870 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
871 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
872 if (!pTbCache)
873 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
874 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
875 cbTbCache, cCacheEntries, pVCpu->idCpu);
876
877 /*
878 * Initialize it (assumes zeroed by the allocator).
879 */
880 pTbCache->uMagic = IEMTBCACHE_MAGIC;
881 pTbCache->cHash = cCacheEntries;
882 pTbCache->uHashMask = cCacheEntries - 1;
883 Assert(pTbCache->cHash > pTbCache->uHashMask);
884 pVCpu->iem.s.pTbCacheR3 = pTbCache;
885
886 /*
887 * Initialize the native executable memory allocator.
888 */
889#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
890 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
891 AssertLogRelRCReturn(rc, rc);
892#else
893 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
894#endif
895
896 return VINF_SUCCESS;
897}
898
899
900/**
901 * Inner free worker.
902 */
903static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
904 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
905{
906 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
907 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
908 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
909#ifdef VBOX_STRICT
910 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
911 Assert(pTbOther != pTb);
912#endif
913
914 /*
915 * Unlink the TB from the hash table.
916 */
917 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
918
919 /*
920 * Free the TB itself.
921 */
922 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
923 {
924 case IEMTB_F_TYPE_THREADED:
925 pTbAllocator->cThreadedTbs -= 1;
926 RTMemFree(pTb->Thrd.paCalls);
927 break;
928#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
929 case IEMTB_F_TYPE_NATIVE:
930 pTbAllocator->cNativeTbs -= 1;
931 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
932 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
933 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
934 break;
935#endif
936 default:
937 AssertFailed();
938 }
939
940 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
941
942 pTb->pNext = pTbAllocator->pTbsFreeHead;
943 pTbAllocator->pTbsFreeHead = pTb;
944 pTb->fFlags = 0;
945 pTb->GCPhysPc = UINT64_MAX;
946 pTb->Gen.uPtr = 0;
947 pTb->Gen.uData = 0;
948 pTb->cTbLookupEntries = 0;
949 pTb->cbOpcodes = 0;
950 pTb->pabOpcodes = NULL;
951
952 Assert(pTbAllocator->cInUseTbs > 0);
953
954 pTbAllocator->cInUseTbs -= 1;
955 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
956}
957
958
959/**
960 * Frees the given TB.
961 *
962 * @param pVCpu The cross context virtual CPU structure of the calling
963 * thread.
964 * @param pTb The translation block to free.
965 * @thread EMT(pVCpu)
966 */
967DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
968{
969 /*
970 * Validate state.
971 */
972 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
973 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
974 uint8_t const idxChunk = pTb->idxAllocChunk;
975 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
976 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
977 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
978
979 /*
980 * Invalidate the TB lookup pointer and call the inner worker.
981 */
982 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
983 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
984}
985
986
987/**
988 * Schedules a TB for freeing when it's not longer being executed and/or part of
989 * the caller's call stack.
990 *
991 * The TB will be removed from the translation block cache, though, so it isn't
992 * possible to executed it again and the IEMTB::pNext member can be used to link
993 * it together with other TBs awaiting freeing.
994 *
995 * @param pVCpu The cross context virtual CPU structure of the calling
996 * thread.
997 * @param pTb The translation block to schedule for freeing.
998 */
999static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1000{
1001 /*
1002 * Validate state.
1003 */
1004 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1005 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1006 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1007 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1008 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1009 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1010#ifdef VBOX_STRICT
1011 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1012 Assert(pTbOther != pTb);
1013#endif
1014
1015 /*
1016 * Remove it from the cache and prepend it to the allocator's todo list.
1017 *
1018 * Note! It could still be in various lookup tables, so we trash the GCPhys
1019 * and CS attribs to ensure it won't be reused.
1020 */
1021 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1022 pTb->GCPhysPc = NIL_RTGCPHYS;
1023 pTb->x86.fAttr = UINT16_MAX;
1024
1025 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1026 pTbAllocator->pDelayedFreeHead = pTb;
1027}
1028
1029
1030/**
1031 * Processes the delayed frees.
1032 *
1033 * This is called by the allocator function as well as the native recompile
1034 * function before making any TB or executable memory allocations respectively.
1035 */
1036void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1037{
1038 /** @todo r-bird: these have already been removed from the cache,
1039 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1040 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1041 pTbAllocator->pDelayedFreeHead = NULL;
1042 while (pTb)
1043 {
1044 PIEMTB const pTbNext = pTb->pNext;
1045 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1046 iemTbAllocatorFree(pVCpu, pTb);
1047 pTb = pTbNext;
1048 }
1049}
1050
1051
1052#if 0
1053/**
1054 * Frees all TBs.
1055 */
1056static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1057{
1058 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1059 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1060 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1061
1062 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1063
1064 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1065 while (idxChunk-- > 0)
1066 {
1067 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1068 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1069 while (idxTb-- > 0)
1070 {
1071 PIEMTB const pTb = &paTbs[idxTb];
1072 if (pTb->fFlags)
1073 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1074 }
1075 }
1076
1077 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1078
1079# if 1
1080 /* Reset the free list. */
1081 pTbAllocator->pTbsFreeHead = NULL;
1082 idxChunk = pTbAllocator->cAllocatedChunks;
1083 while (idxChunk-- > 0)
1084 {
1085 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1086 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1087 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1088 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1089 {
1090 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1091 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1092 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1093 }
1094 }
1095# endif
1096
1097# if 1
1098 /* Completely reset the TB cache. */
1099 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1100# endif
1101
1102 return VINF_SUCCESS;
1103}
1104#endif
1105
1106
1107/**
1108 * Grow the translation block allocator with another chunk.
1109 */
1110static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1111{
1112 /*
1113 * Validate state.
1114 */
1115 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1116 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1117 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1118 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1119 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1120
1121 /*
1122 * Allocate a new chunk and add it to the allocator.
1123 */
1124 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1125 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1126 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1127
1128 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1129 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1130 {
1131 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1132 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1133 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1134 }
1135 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1136 pTbAllocator->cTotalTbs += cTbsPerChunk;
1137
1138 return VINF_SUCCESS;
1139}
1140
1141
1142/**
1143 * Allocates a TB from allocator with free block.
1144 *
1145 * This is common code to both the fast and slow allocator code paths.
1146 */
1147DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1148{
1149 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1150 Assert(pTbAllocator->pTbsFreeHead);
1151
1152 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1153 pTbAllocator->pTbsFreeHead = pTb->pNext;
1154 pTbAllocator->cInUseTbs += 1;
1155 if (fThreaded)
1156 pTbAllocator->cThreadedTbs += 1;
1157 else
1158 pTbAllocator->cNativeTbs += 1;
1159 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1160 return pTb;
1161}
1162
1163
1164/**
1165 * Slow path for iemTbAllocatorAlloc.
1166 */
1167static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1168{
1169 /*
1170 * With some luck we can add another chunk.
1171 */
1172 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1173 {
1174 int rc = iemTbAllocatorGrow(pVCpu);
1175 if (RT_SUCCESS(rc))
1176 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1177 }
1178
1179 /*
1180 * We have to prune stuff. Sigh.
1181 *
1182 * This requires scanning for older TBs and kick them out. Not sure how to
1183 * best do this as we don't want to maintain any list of TBs ordered by last
1184 * usage time. But one reasonably simple approach would be that each time we
1185 * get here we continue a sequential scan of the allocation chunks,
1186 * considering just a smallish number of TBs and freeing a fixed portion of
1187 * them. Say, we consider the next 128 TBs, freeing the least recently used
1188 * in out of groups of 4 TBs, resulting in 32 free TBs.
1189 */
1190 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1191 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1192 uint32_t const cTbsToPrune = 128;
1193 uint32_t const cTbsPerGroup = 4;
1194 uint32_t cFreedTbs = 0;
1195#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1196 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1197#else
1198 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1199#endif
1200 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1201 idxTbPruneFrom = 0;
1202 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1203 {
1204 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1205 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1206 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1207 uint32_t cMsAge = msNow - pTb->msLastUsed;
1208 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1209
1210 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1211 {
1212#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1213 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1214 { /* likely */ }
1215 else
1216 {
1217 idxInChunk2 = 0;
1218 idxChunk2 += 1;
1219 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1220 idxChunk2 = 0;
1221 }
1222#endif
1223 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1224 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1225 if ( cMsAge2 > cMsAge
1226 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1227 {
1228 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1229 pTb = pTb2;
1230 idxChunk = idxChunk2;
1231 idxInChunk = idxInChunk2;
1232 cMsAge = cMsAge2;
1233 }
1234 }
1235
1236 /* Free the TB. */
1237 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1238 cFreedTbs++; /* paranoia */
1239 }
1240 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1241 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1242
1243 /* Flush the TB lookup entry pointer. */
1244 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1245
1246 /*
1247 * Allocate a TB from the ones we've pruned.
1248 */
1249 if (cFreedTbs)
1250 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1251 return NULL;
1252}
1253
1254
1255/**
1256 * Allocate a translation block.
1257 *
1258 * @returns Pointer to block on success, NULL if we're out and is unable to
1259 * free up an existing one (very unlikely once implemented).
1260 * @param pVCpu The cross context virtual CPU structure of the calling
1261 * thread.
1262 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1263 * For statistics.
1264 */
1265DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1266{
1267 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1268 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1269
1270 /* Free any pending TBs before we proceed. */
1271 if (!pTbAllocator->pDelayedFreeHead)
1272 { /* probably likely */ }
1273 else
1274 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1275
1276 /* If the allocator is full, take slow code path.*/
1277 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1278 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1279 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1280}
1281
1282
1283/**
1284 * This is called when we're out of space for native TBs.
1285 *
1286 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1287 * The difference is that we only prune native TBs and will only free any if
1288 * there are least two in a group. The conditions under which we're called are
1289 * different - there will probably be free TBs in the table when we're called.
1290 * Therefore we increase the group size and max scan length, though we'll stop
1291 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1292 * up at least 8 TBs.
1293 */
1294void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1295{
1296 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1297 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1298
1299 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1300
1301 /*
1302 * Flush the delayed free list before we start freeing TBs indiscriminately.
1303 */
1304 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1305
1306 /*
1307 * Scan and free TBs.
1308 */
1309 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1310 uint32_t const cTbsToPrune = 128 * 8;
1311 uint32_t const cTbsPerGroup = 4 * 4;
1312 uint32_t cFreedTbs = 0;
1313 uint32_t cMaxInstrs = 0;
1314 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1315 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1316 {
1317 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1318 idxTbPruneFrom = 0;
1319 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1320 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1321 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1322 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1323 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1324
1325 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1326 {
1327 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1328 { /* likely */ }
1329 else
1330 {
1331 idxInChunk2 = 0;
1332 idxChunk2 += 1;
1333 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1334 idxChunk2 = 0;
1335 }
1336 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1337 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1338 {
1339 cNativeTbs += 1;
1340 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1341 if ( cMsAge2 > cMsAge
1342 || ( cMsAge2 == cMsAge
1343 && ( pTb2->cUsed < pTb->cUsed
1344 || ( pTb2->cUsed == pTb->cUsed
1345 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1346 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1347 {
1348 pTb = pTb2;
1349 idxChunk = idxChunk2;
1350 idxInChunk = idxInChunk2;
1351 cMsAge = cMsAge2;
1352 }
1353 }
1354 }
1355
1356 /* Free the TB if we found at least two native one in this group. */
1357 if (cNativeTbs >= 2)
1358 {
1359 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1360 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1361 cFreedTbs++;
1362 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1363 break;
1364 }
1365 }
1366 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1367
1368 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1369}
1370
1371
1372/*********************************************************************************************************************************
1373* Threaded Recompiler Core *
1374*********************************************************************************************************************************/
1375/**
1376 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1377 * @returns pszBuf.
1378 * @param fFlags The flags.
1379 * @param pszBuf The output buffer.
1380 * @param cbBuf The output buffer size. At least 32 bytes.
1381 */
1382DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1383{
1384 Assert(cbBuf >= 32);
1385 static RTSTRTUPLE const s_aModes[] =
1386 {
1387 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1388 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1389 /* [02] = */ { RT_STR_TUPLE("!2!") },
1390 /* [03] = */ { RT_STR_TUPLE("!3!") },
1391 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1392 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1393 /* [06] = */ { RT_STR_TUPLE("!6!") },
1394 /* [07] = */ { RT_STR_TUPLE("!7!") },
1395 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1396 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1397 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1398 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1399 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1400 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1401 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1402 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1403 /* [10] = */ { RT_STR_TUPLE("!10!") },
1404 /* [11] = */ { RT_STR_TUPLE("!11!") },
1405 /* [12] = */ { RT_STR_TUPLE("!12!") },
1406 /* [13] = */ { RT_STR_TUPLE("!13!") },
1407 /* [14] = */ { RT_STR_TUPLE("!14!") },
1408 /* [15] = */ { RT_STR_TUPLE("!15!") },
1409 /* [16] = */ { RT_STR_TUPLE("!16!") },
1410 /* [17] = */ { RT_STR_TUPLE("!17!") },
1411 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1412 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1413 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1414 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1415 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1416 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1417 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1418 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1419 };
1420 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1421 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1422 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1423
1424 pszBuf[off++] = ' ';
1425 pszBuf[off++] = 'C';
1426 pszBuf[off++] = 'P';
1427 pszBuf[off++] = 'L';
1428 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1429 Assert(off < 32);
1430
1431 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1432
1433 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1434 {
1435 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1436 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1437 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1438 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1439 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1440 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1441 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1442 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1443 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1444 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1445 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1446 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1447 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1448 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1449 };
1450 if (fFlags)
1451 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1452 if (s_aFlags[i].fFlag & fFlags)
1453 {
1454 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1455 pszBuf[off++] = ' ';
1456 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1457 off += s_aFlags[i].cchName;
1458 fFlags &= ~s_aFlags[i].fFlag;
1459 if (!fFlags)
1460 break;
1461 }
1462 pszBuf[off] = '\0';
1463
1464 return pszBuf;
1465}
1466
1467
1468/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1469static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1470{
1471 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1472 pDis->cbCachedInstr += cbMaxRead;
1473 RT_NOREF(cbMinRead);
1474 return VERR_NO_DATA;
1475}
1476
1477
1478/**
1479 * Worker for iemThreadedDisassembleTb.
1480 */
1481static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1482 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1483{
1484 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1485 {
1486 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1487 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1488 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1489 {
1490 PIEMTB pLookupTb = papTbLookup[iLookup];
1491 if (pLookupTb)
1492 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1493 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1494 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1495 : "invalid");
1496 else
1497 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1498 }
1499 pHlp->pfnPrintf(pHlp, "\n");
1500 }
1501 else
1502 {
1503 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1504 idxFirst, cEntries, pTb->cTbLookupEntries);
1505 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1506 }
1507}
1508
1509
1510DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1511{
1512 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1513
1514 char szDisBuf[512];
1515
1516 /*
1517 * Print TB info.
1518 */
1519 pHlp->pfnPrintf(pHlp,
1520 "pTb=%p: GCPhysPc=%RGp (%RGv) cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1521 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1522 pTb, pTb->GCPhysPc, pTb->FlatPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1523 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1524
1525 /*
1526 * This disassembly is driven by the debug info which follows the native
1527 * code and indicates when it starts with the next guest instructions,
1528 * where labels are and such things.
1529 */
1530 DISSTATE Dis;
1531 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1532 uint32_t const cCalls = pTb->Thrd.cCalls;
1533 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1534 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1535 : DISCPUMODE_64BIT;
1536 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1537 uint8_t idxRange = UINT8_MAX;
1538 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1539 uint32_t offRange = 0;
1540 uint32_t offOpcodes = 0;
1541 uint32_t const cbOpcodes = pTb->cbOpcodes;
1542 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1543 bool fTbLookupSeen0 = false;
1544
1545 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1546 {
1547 /*
1548 * New opcode range?
1549 */
1550 if ( idxRange == UINT8_MAX
1551 || idxRange >= cRanges
1552 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1553 {
1554 idxRange += 1;
1555 if (idxRange < cRanges)
1556 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1557 else
1558 continue;
1559 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1560 + (pTb->aRanges[idxRange].idxPhysPage == 0
1561 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1562 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1563 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1564 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1565 pTb->aRanges[idxRange].idxPhysPage);
1566 GCPhysPc += offRange;
1567 }
1568
1569 /*
1570 * Disassemble another guest instruction?
1571 */
1572 if ( paCalls[iCall].offOpcode != offOpcodes
1573 && paCalls[iCall].cbOpcode > 0
1574 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1575 {
1576 offOpcodes = paCalls[iCall].offOpcode;
1577 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1578 uint32_t cbInstr = 1;
1579 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1580 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1581 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1582 if (RT_SUCCESS(rc))
1583 {
1584 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1585 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1586 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1587 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1588 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1589 }
1590 else
1591 {
1592 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1593 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1594 cbInstr = paCalls[iCall].cbOpcode;
1595 }
1596 GCPhysPc += cbInstr;
1597 offRange += cbInstr;
1598 }
1599
1600 /*
1601 * Dump call details.
1602 */
1603 pHlp->pfnPrintf(pHlp,
1604 " Call #%u to %s (%u args)\n",
1605 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1606 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1607 if (paCalls[iCall].uTbLookup != 0)
1608 {
1609 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1610 fTbLookupSeen0 = idxFirst == 0;
1611 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1612 }
1613
1614 /*
1615 * Snoop fExec.
1616 */
1617 switch (paCalls[iCall].enmFunction)
1618 {
1619 default:
1620 break;
1621 case kIemThreadedFunc_BltIn_CheckMode:
1622 fExec = paCalls[iCall].auParams[0];
1623 break;
1624 }
1625 }
1626
1627 if (!fTbLookupSeen0)
1628 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1629}
1630
1631
1632
1633/**
1634 * Allocate a translation block for threadeded recompilation.
1635 *
1636 * This is allocated with maxed out call table and storage for opcode bytes,
1637 * because it's only supposed to be called once per EMT to allocate the TB
1638 * pointed to by IEMCPU::pThrdCompileTbR3.
1639 *
1640 * @returns Pointer to the translation block on success, NULL on failure.
1641 * @param pVM The cross context virtual machine structure.
1642 * @param pVCpu The cross context virtual CPU structure of the calling
1643 * thread.
1644 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1645 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1646 */
1647static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1648{
1649 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1650 if (pTb)
1651 {
1652 unsigned const cCalls = 256;
1653 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1654 if (pTb->Thrd.paCalls)
1655 {
1656 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1657 if (pTb->pabOpcodes)
1658 {
1659 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1660 pTb->Thrd.cAllocated = cCalls;
1661 pTb->Thrd.cCalls = 0;
1662 pTb->cbOpcodes = 0;
1663 pTb->pNext = NULL;
1664 pTb->cUsed = 0;
1665 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1666 pTb->idxAllocChunk = UINT8_MAX;
1667 pTb->GCPhysPc = GCPhysPc;
1668 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1669 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1670 pTb->cInstructions = 0;
1671 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1672
1673 /* Init the first opcode range. */
1674 pTb->cRanges = 1;
1675 pTb->aRanges[0].cbOpcodes = 0;
1676 pTb->aRanges[0].offOpcodes = 0;
1677 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1678 pTb->aRanges[0].u2Unused = 0;
1679 pTb->aRanges[0].idxPhysPage = 0;
1680 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1681 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1682
1683 return pTb;
1684 }
1685 RTMemFree(pTb->Thrd.paCalls);
1686 }
1687 RTMemFree(pTb);
1688 }
1689 RT_NOREF(pVM);
1690 return NULL;
1691}
1692
1693
1694/**
1695 * Called on the TB that are dedicated for recompilation before it's reused.
1696 *
1697 * @param pVCpu The cross context virtual CPU structure of the calling
1698 * thread.
1699 * @param pTb The translation block to reuse.
1700 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1701 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1702 */
1703static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1704{
1705 pTb->GCPhysPc = GCPhysPc;
1706 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1707 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1708 pTb->Thrd.cCalls = 0;
1709 pTb->cbOpcodes = 0;
1710 pTb->cInstructions = 0;
1711 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1712
1713 /* Init the first opcode range. */
1714 pTb->cRanges = 1;
1715 pTb->aRanges[0].cbOpcodes = 0;
1716 pTb->aRanges[0].offOpcodes = 0;
1717 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1718 pTb->aRanges[0].u2Unused = 0;
1719 pTb->aRanges[0].idxPhysPage = 0;
1720 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1721 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1722}
1723
1724
1725/**
1726 * Used to duplicate a threded translation block after recompilation is done.
1727 *
1728 * @returns Pointer to the translation block on success, NULL on failure.
1729 * @param pVM The cross context virtual machine structure.
1730 * @param pVCpu The cross context virtual CPU structure of the calling
1731 * thread.
1732 * @param pTbSrc The TB to duplicate.
1733 */
1734static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1735{
1736 /*
1737 * Just using the heap for now. Will make this more efficient and
1738 * complicated later, don't worry. :-)
1739 */
1740 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1741 if (pTb)
1742 {
1743 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1744 memcpy(pTb, pTbSrc, sizeof(*pTb));
1745 pTb->idxAllocChunk = idxAllocChunk;
1746
1747 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1748 Assert(cCalls > 0);
1749 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1750 if (pTb->Thrd.paCalls)
1751 {
1752 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1753 Assert(cbTbLookup > 0);
1754 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1755 Assert(cbOpcodes > 0);
1756 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1757 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1758 if (pbBoth)
1759 {
1760 RT_BZERO(pbBoth, cbTbLookup);
1761 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1762 pTb->Thrd.cAllocated = cCalls;
1763 pTb->pNext = NULL;
1764 pTb->cUsed = 0;
1765 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1766 pTb->fFlags = pTbSrc->fFlags;
1767
1768 return pTb;
1769 }
1770 RTMemFree(pTb->Thrd.paCalls);
1771 }
1772 iemTbAllocatorFree(pVCpu, pTb);
1773 }
1774 RT_NOREF(pVM);
1775 return NULL;
1776
1777}
1778
1779
1780/**
1781 * Adds the given TB to the hash table.
1782 *
1783 * @param pVCpu The cross context virtual CPU structure of the calling
1784 * thread.
1785 * @param pTbCache The cache to add it to.
1786 * @param pTb The translation block to add.
1787 */
1788static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1789{
1790 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1791
1792 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1793 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1794 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1795 if (LogIs12Enabled())
1796 {
1797 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1798 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1799 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1800 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1801 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1802 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1803 pTb->aRanges[idxRange].idxPhysPage == 0
1804 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1805 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1806 }
1807}
1808
1809
1810/**
1811 * Called by opcode verifier functions when they detect a problem.
1812 */
1813void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1814{
1815 /* We cannot free the current TB (indicated by fSafeToFree) because:
1816 - A threaded TB will have its current call entry accessed
1817 to update pVCpu->iem.s.cInstructions.
1818 - A native TB will have code left to execute. */
1819 if (fSafeToFree)
1820 iemTbAllocatorFree(pVCpu, pTb);
1821 else
1822 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1823}
1824
1825
1826/*
1827 * Real code.
1828 */
1829
1830#ifdef LOG_ENABLED
1831/**
1832 * Logs the current instruction.
1833 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1834 * @param pszFunction The IEM function doing the execution.
1835 * @param idxInstr The instruction number in the block.
1836 */
1837static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1838{
1839# ifdef IN_RING3
1840 if (LogIs2Enabled())
1841 {
1842 char szInstr[256];
1843 uint32_t cbInstr = 0;
1844 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1845 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1846 szInstr, sizeof(szInstr), &cbInstr);
1847
1848 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1849 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1850 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1851 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1852 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1853 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1854 " %s\n"
1855 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1856 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1857 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1858 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1859 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1860 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1861 szInstr));
1862
1863 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1864 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1865 }
1866 else
1867# endif
1868 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1869 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1870}
1871#endif /* LOG_ENABLED */
1872
1873
1874#if 0
1875static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1876{
1877 RT_NOREF(pVM, pVCpu);
1878 return rcStrict;
1879}
1880#endif
1881
1882
1883/**
1884 * Initializes the decoder state when compiling TBs.
1885 *
1886 * This presumes that fExec has already be initialized.
1887 *
1888 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1889 * to apply fixes to them as well.
1890 *
1891 * @param pVCpu The cross context virtual CPU structure of the calling
1892 * thread.
1893 * @param fReInit Clear for the first call for a TB, set for subsequent
1894 * calls from inside the compile loop where we can skip a
1895 * couple of things.
1896 * @param fExtraFlags The extra translation block flags when @a fReInit is
1897 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1898 * checked.
1899 */
1900DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1901{
1902 /* ASSUMES: That iemInitExec was already called and that anyone changing
1903 CPU state affecting the fExec bits since then will have updated fExec! */
1904 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1905 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1906
1907 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1908
1909 /* Decoder state: */
1910 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1911 pVCpu->iem.s.enmEffAddrMode = enmMode;
1912 if (enmMode != IEMMODE_64BIT)
1913 {
1914 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1915 pVCpu->iem.s.enmEffOpSize = enmMode;
1916 }
1917 else
1918 {
1919 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1920 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1921 }
1922 pVCpu->iem.s.fPrefixes = 0;
1923 pVCpu->iem.s.uRexReg = 0;
1924 pVCpu->iem.s.uRexB = 0;
1925 pVCpu->iem.s.uRexIndex = 0;
1926 pVCpu->iem.s.idxPrefix = 0;
1927 pVCpu->iem.s.uVex3rdReg = 0;
1928 pVCpu->iem.s.uVexLength = 0;
1929 pVCpu->iem.s.fEvexStuff = 0;
1930 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1931 pVCpu->iem.s.offModRm = 0;
1932 pVCpu->iem.s.iNextMapping = 0;
1933
1934 if (!fReInit)
1935 {
1936 pVCpu->iem.s.cActiveMappings = 0;
1937 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1938 pVCpu->iem.s.fEndTb = false;
1939 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
1940 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1941 pVCpu->iem.s.fTbCrossedPage = false;
1942 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1943 pVCpu->iem.s.idxLastCheckIrqCallNo = UINT16_MAX;
1944 pVCpu->iem.s.fTbCurInstrIsSti = false;
1945 /* Force RF clearing and TF checking on first instruction in the block
1946 as we don't really know what came before and should assume the worst: */
1947 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1948 }
1949 else
1950 {
1951 Assert(pVCpu->iem.s.cActiveMappings == 0);
1952 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1953 Assert(pVCpu->iem.s.fEndTb == false);
1954 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1955 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1956 }
1957 pVCpu->iem.s.fTbCurInstr = 0;
1958
1959#ifdef DBGFTRACE_ENABLED
1960 switch (IEM_GET_CPU_MODE(pVCpu))
1961 {
1962 case IEMMODE_64BIT:
1963 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1964 break;
1965 case IEMMODE_32BIT:
1966 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1967 break;
1968 case IEMMODE_16BIT:
1969 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1970 break;
1971 }
1972#endif
1973}
1974
1975
1976/**
1977 * Initializes the opcode fetcher when starting the compilation.
1978 *
1979 * @param pVCpu The cross context virtual CPU structure of the calling
1980 * thread.
1981 */
1982DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1983{
1984 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1985#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1986 pVCpu->iem.s.offOpcode = 0;
1987#else
1988 RT_NOREF(pVCpu);
1989#endif
1990}
1991
1992
1993/**
1994 * Re-initializes the opcode fetcher between instructions while compiling.
1995 *
1996 * @param pVCpu The cross context virtual CPU structure of the calling
1997 * thread.
1998 */
1999DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
2000{
2001 if (pVCpu->iem.s.pbInstrBuf)
2002 {
2003 uint64_t off = pVCpu->cpum.GstCtx.rip;
2004 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2005 off += pVCpu->cpum.GstCtx.cs.u64Base;
2006 off -= pVCpu->iem.s.uInstrBufPc;
2007 if (off < pVCpu->iem.s.cbInstrBufTotal)
2008 {
2009 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2010 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2011 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2012 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2013 else
2014 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2015 }
2016 else
2017 {
2018 pVCpu->iem.s.pbInstrBuf = NULL;
2019 pVCpu->iem.s.offInstrNextByte = 0;
2020 pVCpu->iem.s.offCurInstrStart = 0;
2021 pVCpu->iem.s.cbInstrBuf = 0;
2022 pVCpu->iem.s.cbInstrBufTotal = 0;
2023 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2024 }
2025 }
2026 else
2027 {
2028 pVCpu->iem.s.offInstrNextByte = 0;
2029 pVCpu->iem.s.offCurInstrStart = 0;
2030 pVCpu->iem.s.cbInstrBuf = 0;
2031 pVCpu->iem.s.cbInstrBufTotal = 0;
2032#ifdef VBOX_STRICT
2033 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2034#endif
2035 }
2036#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2037 pVCpu->iem.s.offOpcode = 0;
2038#endif
2039}
2040
2041#ifdef LOG_ENABLED
2042
2043/**
2044 * Inserts a NOP call.
2045 *
2046 * This is for debugging.
2047 *
2048 * @returns true on success, false if we're out of call entries.
2049 * @param pTb The translation block being compiled.
2050 */
2051bool iemThreadedCompileEmitNop(PIEMTB pTb)
2052{
2053 /* Emit the call. */
2054 uint32_t const idxCall = pTb->Thrd.cCalls;
2055 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2056 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2057 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2058 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2059 pCall->idxInstr = pTb->cInstructions - 1;
2060 pCall->cbOpcode = 0;
2061 pCall->offOpcode = 0;
2062 pCall->uTbLookup = 0;
2063 pCall->fFlags = 0;
2064 pCall->auParams[0] = 0;
2065 pCall->auParams[1] = 0;
2066 pCall->auParams[2] = 0;
2067 return true;
2068}
2069
2070
2071/**
2072 * Called by iemThreadedCompile if cpu state logging is desired.
2073 *
2074 * @returns true on success, false if we're out of call entries.
2075 * @param pTb The translation block being compiled.
2076 */
2077bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2078{
2079 /* Emit the call. */
2080 uint32_t const idxCall = pTb->Thrd.cCalls;
2081 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2082 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2083 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2084 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2085 pCall->idxInstr = pTb->cInstructions - 1;
2086 pCall->cbOpcode = 0;
2087 pCall->offOpcode = 0;
2088 pCall->uTbLookup = 0;
2089 pCall->fFlags = 0;
2090 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2091 pCall->auParams[1] = 0;
2092 pCall->auParams[2] = 0;
2093 return true;
2094}
2095
2096#endif /* LOG_ENABLED */
2097
2098DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2099{
2100 switch (cbInstr)
2101 {
2102 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2103 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2104 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2105 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2106 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2107 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2108 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2109 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2110 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2111 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2112 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2113 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2114 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2115 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2116 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2117 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2118 }
2119}
2120
2121#ifdef IEM_WITH_INTRA_TB_JUMPS
2122
2123/**
2124 * Emits the necessary tail calls for a full TB loop-jump.
2125 */
2126static bool iemThreadedCompileFullTbJump(PVMCPUCC pVCpu, PIEMTB pTb)
2127{
2128 /*
2129 * We need a timer and maybe IRQ check before jumping, so make sure
2130 * we've got sufficient call entries left before emitting anything.
2131 */
2132 uint32_t idxCall = pTb->Thrd.cCalls;
2133 if (idxCall + 1U <= pTb->Thrd.cAllocated)
2134 {
2135 /*
2136 * We're good, emit the calls.
2137 */
2138 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2139 pTb->Thrd.cCalls = (uint16_t)(idxCall + 2);
2140
2141 /* Always check timers as we risk getting stuck in a loop otherwise. We
2142 combine it with an IRQ check if that's not performed in the TB already. */
2143 pCall->enmFunction = pVCpu->iem.s.idxLastCheckIrqCallNo < idxCall
2144 ? kIemThreadedFunc_BltIn_CheckTimers
2145 : kIemThreadedFunc_BltIn_CheckTimersAndIrq;
2146 pCall->idxInstr = 0;
2147 pCall->offOpcode = 0;
2148 pCall->cbOpcode = 0;
2149 pCall->uTbLookup = 0;
2150 pCall->fFlags = 0;
2151 pCall->auParams[0] = 0;
2152 pCall->auParams[1] = 0;
2153 pCall->auParams[2] = 0;
2154 pCall++;
2155
2156 /* The jump callentry[0]. */
2157 pCall->enmFunction = kIemThreadedFunc_BltIn_Jump;
2158 pCall->idxInstr = 0;
2159 pCall->offOpcode = 0;
2160 pCall->cbOpcode = 0;
2161 pCall->uTbLookup = 0;
2162 pCall->fFlags = 0;
2163 pCall->auParams[0] = 0; /* jump target is call zero */
2164 pCall->auParams[1] = 0;
2165 pCall->auParams[2] = 0;
2166
2167 /* Mark callentry #0 as a jump target. */
2168 pTb->Thrd.paCalls[0].fFlags |= IEMTHREADEDCALLENTRY_F_JUMP_TARGET;
2169 }
2170
2171 return false;
2172}
2173
2174/**
2175 * Called by IEM_MC2_BEGIN_EMIT_CALLS when it detects that we're back at the
2176 * first instruction and we didn't just branch to it (that's handled below).
2177 *
2178 * This will emit a loop iff everything is compatible with that.
2179 */
2180DECLHIDDEN(int) iemThreadedCompileBackAtFirstInstruction(PVMCPU pVCpu, PIEMTB pTb) RT_NOEXCEPT
2181{
2182 /* Check if the mode matches. */
2183 if ( (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2184 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS))
2185 {
2186 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected2);
2187 iemThreadedCompileFullTbJump(pVCpu, pTb);
2188 }
2189 return VINF_IEM_RECOMPILE_END_TB;
2190}
2191
2192#endif /* IEM_WITH_INTRA_TB_JUMPS */
2193
2194
2195/**
2196 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2197 *
2198 * - CS LIM check required.
2199 * - Must recheck opcode bytes.
2200 * - Previous instruction branched.
2201 * - TLB load detected, probably due to page crossing.
2202 *
2203 * @returns true if everything went well, false if we're out of space in the TB
2204 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2205 * @param pVCpu The cross context virtual CPU structure of the calling
2206 * thread.
2207 * @param pTb The translation block being compiled.
2208 */
2209bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2210{
2211 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2212 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2213#if 0
2214 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2215 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2216#endif
2217
2218 /*
2219 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2220 * see if it's needed to start checking.
2221 */
2222 bool fConsiderCsLimChecking;
2223 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2224 if ( fMode == IEM_F_MODE_X86_64BIT
2225 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2226 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2227 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2228 fConsiderCsLimChecking = false; /* already enabled or not needed */
2229 else
2230 {
2231 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2232 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2233 fConsiderCsLimChecking = true; /* likely */
2234 else
2235 {
2236 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2237 return false;
2238 }
2239 }
2240
2241 /*
2242 * Prepare call now, even before we know if can accept the instruction in this TB.
2243 * This allows us amending parameters w/o making every case suffer.
2244 */
2245 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2246 uint16_t const offOpcode = pTb->cbOpcodes;
2247 uint8_t idxRange = pTb->cRanges - 1;
2248
2249 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2250 pCall->idxInstr = pTb->cInstructions;
2251 pCall->cbOpcode = cbInstr;
2252 pCall->offOpcode = offOpcode;
2253 pCall->uTbLookup = 0;
2254 pCall->fFlags = 0;
2255 pCall->auParams[0] = (uint32_t)cbInstr
2256 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2257 /* The upper dword is sometimes used for cbStartPage. */;
2258 pCall->auParams[1] = idxRange;
2259 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2260
2261/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2262 * gotten onto. If we do, stop */
2263
2264 /*
2265 * Case 1: We've branched (RIP changed).
2266 *
2267 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2268 * TB, end the TB here as it is most likely a loop and if it
2269 * made sense to unroll it, the guest code compiler should've
2270 * done it already.
2271 *
2272 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2273 * Req: 1 extra range, no extra phys.
2274 *
2275 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2276 * necessary (fTbCrossedPage is true).
2277 * Req: 1 extra range, probably 1 extra phys page entry.
2278 *
2279 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2280 * but in addition we cross into the following page and require
2281 * another TLB load.
2282 * Req: 2 extra ranges, probably 2 extra phys page entries.
2283 *
2284 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2285 * the following page (thus fTbCrossedPage is true).
2286 * Req: 2 extra ranges, probably 1 extra phys page entry.
2287 *
2288 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2289 * it may trigger "spuriously" from the CPU point of view because of
2290 * physical page changes that'll invalid the physical TLB and trigger a
2291 * call to the function. In theory this be a big deal, just a bit
2292 * performance loss as we'll pick the LoadingTlb variants.
2293 *
2294 * Note! We do not currently optimize branching to the next instruction (sorry
2295 * 32-bit PIC code). We could maybe do that in the branching code that
2296 * sets (or not) fTbBranched.
2297 */
2298 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2299 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2300 * code. This'll require filtering out far jmps and calls, as they
2301 * load CS which should technically be considered indirect since the
2302 * GDT/LDT entry's base address can be modified independently from
2303 * the code. */
2304 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2305 {
2306 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2307 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2308 {
2309 /* 1a + 1b - instruction fully within the branched to page. */
2310 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2311 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2312
2313 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2314 {
2315 /* Check that we've got a free range. */
2316 idxRange += 1;
2317 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2318 { /* likely */ }
2319 else
2320 {
2321 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2322 return false;
2323 }
2324 pCall->auParams[1] = idxRange;
2325 pCall->auParams[2] = 0;
2326
2327 /* Check that we've got a free page slot. */
2328 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2329 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2330 uint8_t idxPhysPage;
2331 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2332 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2333 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2334 {
2335 pTb->aGCPhysPages[0] = GCPhysNew;
2336 pTb->aRanges[idxRange].idxPhysPage = 1;
2337 idxPhysPage = UINT8_MAX;
2338 }
2339 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2340 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2341 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2342 {
2343 pTb->aGCPhysPages[1] = GCPhysNew;
2344 pTb->aRanges[idxRange].idxPhysPage = 2;
2345 idxPhysPage = UINT8_MAX;
2346 }
2347 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2348 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2349 else
2350 {
2351 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2352 return false;
2353 }
2354
2355 /* Loop check: We weave the loop check in here to optimize the lookup. */
2356 if (idxPhysPage != UINT8_MAX)
2357 {
2358 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2359 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2360 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2361 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2362 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2363 {
2364 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2365#ifdef IEM_WITH_INTRA_TB_JUMPS
2366 /* If we're looping back to the start of the TB and the mode is still the same,
2367 we could emit a jump optimization. For now we don't do page transitions
2368 as that implies TLB loading and such. */
2369 if ( idxLoopRange == 0
2370 && offPhysPc == pTb->aRanges[0].offPhysPage
2371 && (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2372 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS)
2373 && (pVCpu->iem.s.fTbBranched & ( IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR
2374 | IEMBRANCHED_F_STACK | IEMBRANCHED_F_RELATIVE))
2375 == IEMBRANCHED_F_RELATIVE)
2376 {
2377 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected);
2378 return iemThreadedCompileFullTbJump(pVCpu, pTb);
2379 }
2380#endif
2381 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2382 return false;
2383 }
2384 }
2385
2386 /* Finish setting up the new range. */
2387 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2388 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2389 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2390 pTb->aRanges[idxRange].u2Unused = 0;
2391 pTb->cRanges++;
2392 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2393 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2394 pTb->aRanges[idxRange].offOpcodes));
2395 }
2396 else
2397 {
2398 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2399 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2400 }
2401
2402 /* Determin which function we need to load & check.
2403 Note! For jumps to a new page, we'll set both fTbBranched and
2404 fTbCrossedPage to avoid unnecessary TLB work for intra
2405 page branching */
2406 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2407 || pVCpu->iem.s.fTbCrossedPage)
2408 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2409 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2410 : !fConsiderCsLimChecking
2411 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2412 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2413 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2414 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2415 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2416 : !fConsiderCsLimChecking
2417 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2418 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2419 else
2420 {
2421 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2422 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2423 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2424 : !fConsiderCsLimChecking
2425 ? kIemThreadedFunc_BltIn_CheckOpcodes
2426 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2427 }
2428 }
2429 else
2430 {
2431 /* 1c + 1d - instruction crosses pages. */
2432 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2433 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2434
2435 /* Lazy bird: Check that this isn't case 1c, since we've already
2436 load the first physical address. End the TB and
2437 make it a case 2b instead.
2438
2439 Hmm. Too much bother to detect, so just do the same
2440 with case 1d as well. */
2441#if 0 /** @todo get back to this later when we've got the actual branch code in
2442 * place. */
2443 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2444
2445 /* Check that we've got two free ranges. */
2446 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2447 { /* likely */ }
2448 else
2449 return false;
2450 idxRange += 1;
2451 pCall->auParams[1] = idxRange;
2452 pCall->auParams[2] = 0;
2453
2454 /* ... */
2455
2456#else
2457 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2458 return false;
2459#endif
2460 }
2461 }
2462
2463 /*
2464 * Case 2: Page crossing.
2465 *
2466 * Sub-case 2a: The instruction starts on the first byte in the next page.
2467 *
2468 * Sub-case 2b: The instruction has opcode bytes in both the current and
2469 * following page.
2470 *
2471 * Both cases requires a new range table entry and probably a new physical
2472 * page entry. The difference is in which functions to emit and whether to
2473 * add bytes to the current range.
2474 */
2475 else if (pVCpu->iem.s.fTbCrossedPage)
2476 {
2477 /* Check that we've got a free range. */
2478 idxRange += 1;
2479 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2480 { /* likely */ }
2481 else
2482 {
2483 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2484 return false;
2485 }
2486
2487 /* Check that we've got a free page slot. */
2488 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2489 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2490 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2491 pTb->aRanges[idxRange].idxPhysPage = 0;
2492 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2493 || pTb->aGCPhysPages[0] == GCPhysNew)
2494 {
2495 pTb->aGCPhysPages[0] = GCPhysNew;
2496 pTb->aRanges[idxRange].idxPhysPage = 1;
2497 }
2498 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2499 || pTb->aGCPhysPages[1] == GCPhysNew)
2500 {
2501 pTb->aGCPhysPages[1] = GCPhysNew;
2502 pTb->aRanges[idxRange].idxPhysPage = 2;
2503 }
2504 else
2505 {
2506 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2507 return false;
2508 }
2509
2510 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2511 {
2512 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2513 pCall->auParams[1] = idxRange;
2514 pCall->auParams[2] = 0;
2515
2516 /* Finish setting up the new range. */
2517 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2518 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2519 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2520 pTb->aRanges[idxRange].u2Unused = 0;
2521 pTb->cRanges++;
2522 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2523 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2524 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2525
2526 /* Determin which function we need to load & check. */
2527 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2528 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2529 : !fConsiderCsLimChecking
2530 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2531 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2532 }
2533 else
2534 {
2535 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2536 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2537 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2538 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2539
2540 /* We've good. Split the instruction over the old and new range table entries. */
2541 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2542
2543 pTb->aRanges[idxRange].offPhysPage = 0;
2544 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2545 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2546 pTb->aRanges[idxRange].u2Unused = 0;
2547 pTb->cRanges++;
2548 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2549 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2550 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2551
2552 /* Determin which function we need to load & check. */
2553 if (pVCpu->iem.s.fTbCheckOpcodes)
2554 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2555 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2556 : !fConsiderCsLimChecking
2557 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2558 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2559 else
2560 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2561 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2562 : !fConsiderCsLimChecking
2563 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2564 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2565 }
2566 }
2567
2568 /*
2569 * Regular case: No new range required.
2570 */
2571 else
2572 {
2573 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2574 if (pVCpu->iem.s.fTbCheckOpcodes)
2575 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2576 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2577 : kIemThreadedFunc_BltIn_CheckOpcodes;
2578 else
2579 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2580
2581 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2582 pTb->cbOpcodes = offOpcode + cbInstr;
2583 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2584 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2585 }
2586
2587 /*
2588 * Commit the call.
2589 */
2590 pTb->Thrd.cCalls++;
2591
2592 /*
2593 * Clear state.
2594 */
2595 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2596 pVCpu->iem.s.fTbCrossedPage = false;
2597 pVCpu->iem.s.fTbCheckOpcodes = false;
2598
2599 /*
2600 * Copy opcode bytes.
2601 */
2602 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2603 pTb->cbOpcodes = offOpcode + cbInstr;
2604 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2605
2606 return true;
2607}
2608
2609
2610/**
2611 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2612 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2613 *
2614 * @returns true if anything is pending, false if not.
2615 * @param pVCpu The cross context virtual CPU structure of the calling
2616 * thread.
2617 */
2618DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2619{
2620 uint64_t fCpu = pVCpu->fLocalForcedActions;
2621 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2622#if 1
2623 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2624 if (RT_LIKELY( !fCpu
2625 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2626 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2627 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2628 return false;
2629 return true;
2630#else
2631 return false;
2632#endif
2633
2634}
2635
2636
2637/**
2638 * Called by iemThreadedCompile when a block requires a mode check.
2639 *
2640 * @returns true if we should continue, false if we're out of call entries.
2641 * @param pVCpu The cross context virtual CPU structure of the calling
2642 * thread.
2643 * @param pTb The translation block being compiled.
2644 */
2645static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2646{
2647 /* Emit the call. */
2648 uint32_t const idxCall = pTb->Thrd.cCalls;
2649 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2650 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2651 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2652 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2653 pCall->idxInstr = pTb->cInstructions - 1;
2654 pCall->cbOpcode = 0;
2655 pCall->offOpcode = 0;
2656 pCall->uTbLookup = 0;
2657 pCall->fFlags = 0;
2658 pCall->auParams[0] = pVCpu->iem.s.fExec;
2659 pCall->auParams[1] = 0;
2660 pCall->auParams[2] = 0;
2661 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2662 return true;
2663}
2664
2665
2666/**
2667 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2668 * set.
2669 *
2670 * @returns true if we should continue, false if an IRQ is deliverable or a
2671 * relevant force flag is pending.
2672 * @param pVCpu The cross context virtual CPU structure of the calling
2673 * thread.
2674 * @param pTb The translation block being compiled.
2675 * @sa iemThreadedCompileCheckIrq
2676 */
2677bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2678{
2679 /*
2680 * Skip this we've already emitted a call after the previous instruction
2681 * or if it's the first call, as we're always checking FFs between blocks.
2682 */
2683 uint32_t const idxCall = pTb->Thrd.cCalls;
2684 if ( idxCall > 0
2685 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2686 {
2687 /* Emit the call. */
2688 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2689 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2690 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2691 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2692 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2693 pCall->idxInstr = pTb->cInstructions;
2694 pCall->offOpcode = 0;
2695 pCall->cbOpcode = 0;
2696 pCall->uTbLookup = 0;
2697 pCall->fFlags = 0;
2698 pCall->auParams[0] = 0;
2699 pCall->auParams[1] = 0;
2700 pCall->auParams[2] = 0;
2701 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2702
2703 /* Reset the IRQ check value. */
2704 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2705
2706 /*
2707 * Check for deliverable IRQs and pending force flags.
2708 */
2709 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2710 }
2711 return true; /* continue */
2712}
2713
2714
2715/**
2716 * Emits an IRQ check call and checks for pending IRQs.
2717 *
2718 * @returns true if we should continue, false if an IRQ is deliverable or a
2719 * relevant force flag is pending.
2720 * @param pVCpu The cross context virtual CPU structure of the calling
2721 * thread.
2722 * @param pTb The transation block.
2723 * @sa iemThreadedCompileBeginEmitCallsComplications
2724 */
2725static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2726{
2727 /* Check again in a little bit, unless it is immediately following an STI
2728 in which case we *must* check immediately after the next instruction
2729 as well in case it's executed with interrupt inhibition. We could
2730 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2731 bs3-timers-1 which is doing sti + sti + cli. */
2732 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2733 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2734 else
2735 {
2736 pVCpu->iem.s.fTbCurInstrIsSti = false;
2737 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2738 }
2739 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2740
2741 /*
2742 * Emit the call.
2743 */
2744 uint32_t const idxCall = pTb->Thrd.cCalls;
2745 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2746 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2747 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2748 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2749 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2750 pCall->idxInstr = pTb->cInstructions;
2751 pCall->offOpcode = 0;
2752 pCall->cbOpcode = 0;
2753 pCall->uTbLookup = 0;
2754 pCall->fFlags = 0;
2755 pCall->auParams[0] = 0;
2756 pCall->auParams[1] = 0;
2757 pCall->auParams[2] = 0;
2758
2759 /*
2760 * Check for deliverable IRQs and pending force flags.
2761 */
2762 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2763}
2764
2765
2766/**
2767 * Compiles a new TB and executes it.
2768 *
2769 * We combine compilation and execution here as it makes it simpler code flow
2770 * in the main loop and it allows interpreting while compiling if we want to
2771 * explore that option.
2772 *
2773 * @returns Strict VBox status code.
2774 * @param pVM The cross context virtual machine structure.
2775 * @param pVCpu The cross context virtual CPU structure of the calling
2776 * thread.
2777 * @param GCPhysPc The physical address corresponding to the current
2778 * RIP+CS.BASE.
2779 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2780 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2781 */
2782static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2783{
2784 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2785 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2786 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2787
2788 /*
2789 * Get the TB we use for the recompiling. This is a maxed-out TB so
2790 * that'll we'll make a more efficient copy of when we're done compiling.
2791 */
2792 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2793 if (pTb)
2794 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2795 else
2796 {
2797 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2798 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2799 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2800 }
2801 pTb->FlatPc = pVCpu->iem.s.uInstrBufPc | (GCPhysPc & GUEST_PAGE_OFFSET_MASK);
2802
2803 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2804 functions may get at it. */
2805 pVCpu->iem.s.pCurTbR3 = pTb;
2806
2807#if 0
2808 /* Make sure the CheckIrq condition matches the one in EM. */
2809 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2810 const uint32_t cZeroCalls = 1;
2811#else
2812 const uint32_t cZeroCalls = 0;
2813#endif
2814
2815 /*
2816 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2817 */
2818 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2819 iemThreadedCompileInitOpcodeFetching(pVCpu);
2820 VBOXSTRICTRC rcStrict;
2821 for (;;)
2822 {
2823 /* Process the next instruction. */
2824#ifdef LOG_ENABLED
2825 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2826 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2827 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2828 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2829#endif
2830 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2831 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2832
2833 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2834#if 0
2835 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2836 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2837 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2838#endif
2839 if ( rcStrict == VINF_SUCCESS
2840 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2841 && !pVCpu->iem.s.fEndTb)
2842 {
2843 Assert(pTb->Thrd.cCalls > cCallsPrev);
2844 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2845
2846 pVCpu->iem.s.cInstructions++;
2847
2848 /* Check for mode change _after_ certain CIMPL calls, so check that
2849 we continue executing with the same mode value. */
2850 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2851 { /* probable */ }
2852 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2853 { /* extremely likely */ }
2854 else
2855 break;
2856
2857#if defined(LOG_ENABLED) && 0 /* for debugging */
2858 //iemThreadedCompileEmitNop(pTb);
2859 iemThreadedCompileEmitLogCpuState(pTb);
2860#endif
2861 }
2862 else
2863 {
2864 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2865 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2866 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2867 rcStrict = VINF_SUCCESS;
2868
2869 if (pTb->Thrd.cCalls > cZeroCalls)
2870 {
2871 if (cCallsPrev != pTb->Thrd.cCalls)
2872 pVCpu->iem.s.cInstructions++;
2873 break;
2874 }
2875
2876 pVCpu->iem.s.pCurTbR3 = NULL;
2877 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2878 }
2879
2880 /* Check for IRQs? */
2881 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2882 pVCpu->iem.s.cInstrTillIrqCheck--;
2883 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2884 break;
2885
2886 /* Still space in the TB? */
2887 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2888 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2889 && pTb->cTbLookupEntries < 127)
2890 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2891 else
2892 {
2893 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2894 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2895 break;
2896 }
2897 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2898 }
2899
2900 /*
2901 * Reserve lookup space for the final call entry if necessary.
2902 */
2903 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2904 if (pTb->Thrd.cCalls > 1)
2905 {
2906 if (pFinalCall->uTbLookup == 0)
2907 {
2908 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2909 pTb->cTbLookupEntries += 1;
2910 }
2911 }
2912 else if (pFinalCall->uTbLookup != 0)
2913 {
2914 Assert(pTb->cTbLookupEntries > 1);
2915 pFinalCall->uTbLookup -= 1;
2916 pTb->cTbLookupEntries -= 1;
2917 }
2918
2919 /*
2920 * Duplicate the TB into a completed one and link it.
2921 */
2922 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2923 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2924
2925 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2926
2927#ifdef IEM_COMPILE_ONLY_MODE
2928 /*
2929 * Execute the translation block.
2930 */
2931#endif
2932
2933 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2934}
2935
2936
2937
2938/*********************************************************************************************************************************
2939* Recompiled Execution Core *
2940*********************************************************************************************************************************/
2941
2942/** Default TB factor.
2943 * This is basically the number of nanoseconds we guess executing a TB takes
2944 * on average. We estimates it high if we can.
2945 * @note Best if this is a power of two so it can be translated to a shift. */
2946#define IEM_TIMER_POLL_DEFAULT_FACTOR UINT32_C(64)
2947/** The minimum number of nanoseconds we can allow between timer pollings.
2948 * This must take the cost of TMTimerPollBoolWithNanoTS into mind. We put that
2949 * cost at 104 ns now, thus this constant is at 256 ns. */
2950#define IEM_TIMER_POLL_MIN_NS UINT32_C(256)
2951/** The IEM_TIMER_POLL_MIN_NS value roughly translated to TBs, with some grains
2952 * of salt thrown in.
2953 * The idea is that we will be able to make progress with guest code execution
2954 * before polling timers and between running timers. */
2955#define IEM_TIMER_POLL_MIN_ITER UINT32_C(12)
2956/** The maximum number of nanoseconds we can allow between timer pollings.
2957 * This probably shouldn't be too high, as we don't have any timer
2958 * reprogramming feedback in the polling code. So, when a device reschedule a
2959 * timer for an earlier delivery, we won't know about it. */
2960#define IEM_TIMER_POLL_MAX_NS UINT32_C(8388608) /* 0x800000 ns = 8.4 ms */
2961/** The IEM_TIMER_POLL_MAX_NS value roughly translated to TBs, with some grains
2962 * of salt thrown in.
2963 * This helps control fluctuations in the NU benchmark. */
2964#define IEM_TIMER_POLL_MAX_ITER _512K
2965
2966#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
2967/**
2968 * Calculates the number of TBs till the next timer polling using defaults.
2969 *
2970 * This is used when the previous run wasn't long enough to provide sufficient
2971 * data and when comming back from the HALT state and we haven't actually
2972 * executed anything for a while.
2973 */
2974DECL_FORCE_INLINE(uint32_t) iemPollTimersCalcDefaultCountdown(uint64_t cNsDelta) RT_NOEXCEPT
2975{
2976 if (cNsDelta >= IEM_TIMER_POLL_MAX_NS)
2977 return RT_MIN(IEM_TIMER_POLL_MAX_NS / IEM_TIMER_POLL_DEFAULT_FACTOR, IEM_TIMER_POLL_MAX_ITER);
2978
2979 cNsDelta = RT_BIT_64(ASMBitFirstSetU32(cNsDelta) - 1); /* round down to power of 2 */
2980 uint32_t const cRet = cNsDelta / IEM_TIMER_POLL_DEFAULT_FACTOR;
2981 if (cRet >= IEM_TIMER_POLL_MIN_ITER)
2982 {
2983 if (cRet <= IEM_TIMER_POLL_MAX_ITER)
2984 return cRet;
2985 return IEM_TIMER_POLL_MAX_ITER;
2986 }
2987 return IEM_TIMER_POLL_MIN_ITER;
2988}
2989#endif
2990
2991
2992/**
2993 * Helper for polling timers.
2994 */
2995DECLHIDDEN(int) iemPollTimers(PVMCC pVM, PVMCPUCC pVCpu) RT_NOEXCEPT
2996{
2997 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPoll, a);
2998
2999 /*
3000 * Check for VM_FF_TM_VIRTUAL_SYNC and call TMR3VirtualSyncFF if set.
3001 * This is something all EMTs can do.
3002 */
3003 /* If the virtual sync FF is set, respond to it. */
3004 bool fRanTimers = VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC);
3005 if (!fRanTimers)
3006 { /* likely */ }
3007 else
3008 {
3009 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3010 TMR3VirtualSyncFF(pVM, pVCpu);
3011 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3012 }
3013
3014 /*
3015 * Poll timers.
3016 *
3017 * On the 10980xe the polling averaging 314 ticks, with a min of 201, while
3018 * running a norton utilities DOS benchmark program. TSC runs at 3GHz,
3019 * translating that to 104 ns and 67 ns respectively. (An M2 booting win11
3020 * has an average of 2 ticks / 84 ns.)
3021 *
3022 * With the same setup the TMR3VirtualSyncFF and else branch here profiles
3023 * to 79751 ticks / 26583 ns on average, with a min of 1194 ticks / 398 ns.
3024 * (An M2 booting win11 has an average of 24 ticks / 1008 ns, with a min of
3025 * 8 ticks / 336 ns.)
3026 *
3027 * If we get a zero return value we run timers. Non-timer EMTs shouldn't
3028 * ever see a zero value here, so we just call TMR3TimerQueuesDo. However,
3029 * we do not re-run timers if we already called TMR3VirtualSyncFF above, we
3030 * try to make sure some code is executed first.
3031 */
3032 uint64_t nsNow = 0;
3033 uint64_t cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3034 if (cNsDelta >= 1) /* It is okay to run virtual sync timers a little early. */
3035 { /* likely */ }
3036 else if (!fRanTimers || VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
3037 {
3038 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3039 TMR3TimerQueuesDo(pVM);
3040 fRanTimers = true;
3041 nsNow = 0;
3042 cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3043 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3044 }
3045 else
3046 cNsDelta = 33;
3047
3048 /*
3049 * Calc interval and update the timestamps.
3050 */
3051 uint64_t const cNsSinceLast = nsNow - pVCpu->iem.s.nsRecompilerPollNow;
3052 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3053 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3054
3055 /*
3056 * Set the next polling count down value.
3057 *
3058 * We take the previous value and adjust it according to the cNsSinceLast
3059 * value, if it's not within reason. This can't be too accurate since the
3060 * CheckIrq and intra-TB-checks aren't evenly spaced, they depends highly
3061 * on the guest code.
3062 */
3063#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3064 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3065 if (cNsDelta >= RT_NS_1SEC / 4)
3066 {
3067 /*
3068 * Non-timer EMTs should end up here with a fixed 500ms delta, just return
3069 * the max and keep the polling over head to the deadicated timer EMT.
3070 */
3071 AssertCompile(IEM_TIMER_POLL_MAX_ITER * IEM_TIMER_POLL_DEFAULT_FACTOR <= RT_NS_100MS);
3072 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3073 }
3074 else
3075 {
3076 /*
3077 * This is the timer EMT.
3078 */
3079 if (cNsDelta <= IEM_TIMER_POLL_MIN_NS)
3080 {
3081 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollTiny);
3082 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3083 }
3084 else
3085 {
3086 uint32_t const cNsDeltaAdj = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS : (uint32_t)cNsDelta;
3087 uint32_t const cNsDeltaSlack = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS / 2 : cNsDeltaAdj / 4;
3088 if ( cNsSinceLast < RT_MAX(IEM_TIMER_POLL_MIN_NS, 64)
3089 || cItersTillNextPoll < IEM_TIMER_POLL_MIN_ITER /* paranoia */)
3090 {
3091 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollDefaultCalc);
3092 cItersTillNextPoll = iemPollTimersCalcDefaultCountdown(cNsDeltaAdj);
3093 }
3094 else if ( cNsSinceLast >= cNsDeltaAdj + cNsDeltaSlack
3095 || cNsSinceLast <= cNsDeltaAdj - cNsDeltaSlack)
3096 {
3097 if (cNsSinceLast >= cItersTillNextPoll)
3098 {
3099 uint32_t uFactor = (uint32_t)(cNsSinceLast + cItersTillNextPoll - 1) / cItersTillNextPoll;
3100 cItersTillNextPoll = cNsDeltaAdj / uFactor;
3101 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorDivision, uFactor);
3102 }
3103 else
3104 {
3105 uint32_t uFactor = cItersTillNextPoll / (uint32_t)cNsSinceLast;
3106 cItersTillNextPoll = cNsDeltaAdj * uFactor;
3107 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorMultiplication, uFactor);
3108 }
3109
3110 if (cItersTillNextPoll >= IEM_TIMER_POLL_MIN_ITER)
3111 {
3112 if (cItersTillNextPoll <= IEM_TIMER_POLL_MAX_ITER)
3113 { /* likely */ }
3114 else
3115 {
3116 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollMax);
3117 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3118 }
3119 }
3120 else
3121 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3122 }
3123 else
3124 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollUnchanged);
3125 }
3126 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3127 }
3128#else
3129/** Poll timers every 400 us / 2500 Hz. (source: thin air) */
3130# define IEM_TIMER_POLL_IDEAL_NS (400U * RT_NS_1US)
3131 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3132 uint32_t const cNsIdealPollInterval = IEM_TIMER_POLL_IDEAL_NS;
3133 int64_t const nsFromIdeal = cNsSinceLast - cNsIdealPollInterval;
3134 if (nsFromIdeal < 0)
3135 {
3136 if ((uint64_t)-nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll < _64K)
3137 {
3138 cItersTillNextPoll += cItersTillNextPoll / 8;
3139 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3140 }
3141 }
3142 else
3143 {
3144 if ((uint64_t)nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll > 256)
3145 {
3146 cItersTillNextPoll -= cItersTillNextPoll / 8;
3147 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3148 }
3149 }
3150#endif
3151 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillNextPoll;
3152
3153 /*
3154 * Repeat the IRQ and FF checks.
3155 */
3156 if (cNsDelta > 0)
3157 {
3158 uint32_t fCpu = pVCpu->fLocalForcedActions;
3159 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3160 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3161 | VMCPU_FF_TLB_FLUSH
3162 | VMCPU_FF_UNHALT );
3163 if (RT_LIKELY( ( !fCpu
3164 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3165 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3166 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx)) ) )
3167 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
3168 {
3169 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3170 return VINF_SUCCESS;
3171 }
3172 }
3173 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3174 return VINF_IEM_REEXEC_BREAK_FF;
3175}
3176
3177
3178/** Helper for iemTbExec. */
3179DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
3180{
3181 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
3182 Assert(idx < pTb->cTbLookupEntries);
3183 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
3184}
3185
3186
3187/**
3188 * Executes a translation block.
3189 *
3190 * @returns Strict VBox status code.
3191 * @param pVCpu The cross context virtual CPU structure of the calling
3192 * thread.
3193 * @param pTb The translation block to execute.
3194 */
3195static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
3196{
3197 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
3198
3199 /*
3200 * Set the current TB so CIMPL functions may get at it.
3201 */
3202 pVCpu->iem.s.pCurTbR3 = pTb;
3203 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
3204
3205 /*
3206 * Execute the block.
3207 */
3208#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3209 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
3210 {
3211 pVCpu->iem.s.cTbExecNative++;
3212 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
3213# ifdef LOG_ENABLED
3214 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
3215# endif
3216
3217# ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
3218# ifdef RT_ARCH_AMD64
3219 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
3220# else
3221 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
3222# endif
3223# else
3224# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3225 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
3226# endif
3227# ifdef RT_ARCH_AMD64
3228 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
3229# else
3230 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
3231# endif
3232# endif
3233
3234# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3235 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3236# endif
3237# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3238 /* Restore FPCR/MXCSR if the TB modified it. */
3239 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3240 {
3241 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3242 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3243 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3244 }
3245# endif
3246# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3247 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
3248# endif
3249 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3250 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3251 { /* likely */ }
3252 else
3253 {
3254 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
3255 pVCpu->iem.s.pCurTbR3 = NULL;
3256
3257 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3258 only to break out of TB execution early. */
3259 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3260 {
3261 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
3262 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3263 }
3264
3265 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
3266 only to break out of TB execution early due to pending FFs. */
3267 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
3268 {
3269 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
3270 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3271 }
3272
3273 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
3274 and converted to VINF_SUCCESS or whatever is appropriate. */
3275 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
3276 {
3277 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
3278 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
3279 }
3280
3281 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
3282 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3283 }
3284 }
3285 else
3286#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
3287 {
3288 /*
3289 * The threaded execution loop.
3290 */
3291 pVCpu->iem.s.cTbExecThreaded++;
3292 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
3293#ifdef LOG_ENABLED
3294 uint64_t uRipPrev = UINT64_MAX;
3295#endif
3296 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3297 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3298 while (cCallsLeft-- > 0)
3299 {
3300#ifdef LOG_ENABLED
3301 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
3302 {
3303 uRipPrev = pVCpu->cpum.GstCtx.rip;
3304 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
3305 }
3306 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
3307 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
3308 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
3309#endif
3310#ifdef VBOX_WITH_STATISTICS
3311 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
3312 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
3313#endif
3314 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
3315 pCallEntry->auParams[0],
3316 pCallEntry->auParams[1],
3317 pCallEntry->auParams[2]);
3318 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3319 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3320 pCallEntry++;
3321 else if (rcStrict == VINF_IEM_REEXEC_JUMP)
3322 {
3323 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
3324 Assert(cCallsLeft == 0);
3325 uint32_t const idxTarget = (uint32_t)pCallEntry->auParams[0];
3326 cCallsLeft = pTb->Thrd.cCalls;
3327 AssertBreak(idxTarget < cCallsLeft - 1);
3328 cCallsLeft -= idxTarget;
3329 pCallEntry = &pTb->Thrd.paCalls[idxTarget];
3330 AssertBreak(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET);
3331 }
3332 else
3333 {
3334 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
3335 pVCpu->iem.s.pCurTbR3 = NULL;
3336 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
3337 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
3338
3339 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3340 only to break out of TB execution early. */
3341 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3342 {
3343#ifdef VBOX_WITH_STATISTICS
3344 if (pCallEntry->uTbLookup)
3345 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3346 else
3347 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3348#endif
3349 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3350 }
3351 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3352 }
3353 }
3354
3355 /* Update the lookup entry. */
3356 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3357 }
3358
3359 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3360 pVCpu->iem.s.pCurTbR3 = NULL;
3361 return VINF_SUCCESS;
3362}
3363
3364
3365/**
3366 * This is called when the PC doesn't match the current pbInstrBuf.
3367 *
3368 * Upon return, we're ready for opcode fetching. But please note that
3369 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3370 * MMIO or unassigned).
3371 */
3372static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3373{
3374 pVCpu->iem.s.pbInstrBuf = NULL;
3375 pVCpu->iem.s.offCurInstrStart = 0;
3376 pVCpu->iem.s.offInstrNextByte = 0;
3377 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3378 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3379}
3380
3381
3382/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3383DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3384{
3385 /*
3386 * Set uCurTbStartPc to RIP and calc the effective PC.
3387 */
3388 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3389#if 0 /* unused */
3390 pVCpu->iem.s.uCurTbStartPc = uPc;
3391#endif
3392 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3393 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3394
3395 /*
3396 * Advance within the current buffer (PAGE) when possible.
3397 */
3398 if (pVCpu->iem.s.pbInstrBuf)
3399 {
3400 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3401 if (off < pVCpu->iem.s.cbInstrBufTotal)
3402 {
3403 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3404 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3405 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3406 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3407 else
3408 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3409
3410 return pVCpu->iem.s.GCPhysInstrBuf + off;
3411 }
3412 }
3413 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3414}
3415
3416
3417/**
3418 * Determines the extra IEMTB_F_XXX flags.
3419 *
3420 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3421 * IEMTB_F_CS_LIM_CHECKS (or zero).
3422 * @param pVCpu The cross context virtual CPU structure of the calling
3423 * thread.
3424 */
3425DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3426{
3427 uint32_t fRet = 0;
3428
3429 /*
3430 * Determine the inhibit bits.
3431 */
3432 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3433 { /* typical */ }
3434 else
3435 {
3436 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3437 fRet |= IEMTB_F_INHIBIT_SHADOW;
3438 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3439 fRet |= IEMTB_F_INHIBIT_NMI;
3440 }
3441
3442 /*
3443 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3444 * likely to go invalid before the end of the translation block.
3445 */
3446 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3447 return fRet;
3448
3449 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3450 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3451 return fRet;
3452 return fRet | IEMTB_F_CS_LIM_CHECKS;
3453}
3454
3455
3456VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu, bool fWasHalted)
3457{
3458 /*
3459 * See if there is an interrupt pending in TRPM, inject it if we can.
3460 */
3461 if (!TRPMHasTrap(pVCpu))
3462 { /* likely */ }
3463 else
3464 {
3465 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3466 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3467 { /*likely */ }
3468 else
3469 return rcStrict;
3470 }
3471
3472 /*
3473 * Init the execution environment.
3474 */
3475#if 1 /** @todo this seems like a good idea, however if we ever share memory
3476 * directly with other threads on the host, it isn't necessarily... */
3477 if (pVM->cCpus == 1)
3478 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3479 else
3480#endif
3481 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3482
3483 if (RT_LIKELY(!fWasHalted && pVCpu->iem.s.msRecompilerPollNow != 0))
3484 { }
3485 else
3486 {
3487 /* Do polling after halt and the first time we get here. */
3488#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3489 uint64_t nsNow = 0;
3490 uint32_t const cItersTillPoll = iemPollTimersCalcDefaultCountdown(TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow));
3491 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillPoll;
3492 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillPoll;
3493#else
3494 uint64_t const nsNow = TMVirtualGetNoCheck(pVM);
3495#endif
3496 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3497 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3498 }
3499 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3500
3501 /*
3502 * Run-loop.
3503 *
3504 * If we're using setjmp/longjmp we combine all the catching here to avoid
3505 * having to call setjmp for each block we're executing.
3506 */
3507 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3508 for (;;)
3509 {
3510 VBOXSTRICTRC rcStrict;
3511 IEM_TRY_SETJMP(pVCpu, rcStrict)
3512 {
3513 for (;;)
3514 {
3515 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3516 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3517 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3518 {
3519 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3520 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3521 if (pTb)
3522 rcStrict = iemTbExec(pVCpu, pTb);
3523 else
3524 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3525 }
3526 else
3527 {
3528 /* This can only happen if the current PC cannot be translated into a
3529 host pointer, which means we're in MMIO or unmapped memory... */
3530#if defined(VBOX_STRICT) && defined(IN_RING3)
3531 rcStrict = DBGFSTOP(pVM);
3532 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3533 return rcStrict;
3534#endif
3535 rcStrict = IEMExecLots(pVCpu, 2048, 511, NULL);
3536 }
3537 if (rcStrict == VINF_SUCCESS)
3538 {
3539 Assert(pVCpu->iem.s.cActiveMappings == 0);
3540
3541 /* Note! This IRQ/FF check is repeated in iemPollTimers, iemThreadedFunc_BltIn_CheckIrq
3542 and emitted by iemNativeRecompFunc_BltIn_CheckIrq. */
3543 uint64_t fCpu = pVCpu->fLocalForcedActions;
3544 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3545 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3546 | VMCPU_FF_TLB_FLUSH
3547 | VMCPU_FF_UNHALT );
3548 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3549 if (RT_LIKELY( ( !fCpu
3550 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3551 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3552 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3553 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3554 {
3555 /* Once in a while we need to poll timers here. */
3556 if ((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
3557 { /* likely */ }
3558 else
3559 {
3560 int rc = iemPollTimers(pVM, pVCpu);
3561 if (rc != VINF_SUCCESS)
3562 return VINF_SUCCESS;
3563 }
3564 }
3565 else
3566 return VINF_SUCCESS;
3567 }
3568 else
3569 return rcStrict;
3570 }
3571 }
3572 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3573 {
3574 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3575 pVCpu->iem.s.cLongJumps++;
3576#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3577 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3578#endif
3579 if (pVCpu->iem.s.cActiveMappings > 0)
3580 iemMemRollback(pVCpu);
3581
3582#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3583 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3584 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3585 {
3586 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3587# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3588 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3589 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3590# endif
3591
3592#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3593 /* Restore FPCR/MXCSR if the TB modified it. */
3594 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3595 {
3596 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3597 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3598 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3599 }
3600#endif
3601 }
3602#endif
3603
3604#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3605 /* If pTb isn't NULL we're in iemTbExec. */
3606 if (!pTb)
3607 {
3608 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3609 pTb = pVCpu->iem.s.pCurTbR3;
3610 if (pTb)
3611 {
3612 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3613 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3614 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3615 }
3616 }
3617#endif
3618 pVCpu->iem.s.pCurTbR3 = NULL;
3619 return rcStrict;
3620 }
3621 IEM_CATCH_LONGJMP_END(pVCpu);
3622 }
3623}
3624
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette