VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 106296

Last change on this file since 106296 was 106296, checked in by vboxsync, 7 weeks ago

VMM/IEM: Made VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING build on arm. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 159.3 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 106296 2024-10-12 01:07:25Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
117# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
118#endif
119
120
121/*********************************************************************************************************************************
122* Internal Functions *
123*********************************************************************************************************************************/
124#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
125static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb);
126#endif
127
128
129/**
130 * Calculates the effective address of a ModR/M memory operand, extended version
131 * for use in the recompilers.
132 *
133 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
134 *
135 * May longjmp on internal error.
136 *
137 * @return The effective address.
138 * @param pVCpu The cross context virtual CPU structure of the calling thread.
139 * @param bRm The ModRM byte.
140 * @param cbImmAndRspOffset - First byte: The size of any immediate
141 * following the effective address opcode bytes
142 * (only for RIP relative addressing).
143 * - Second byte: RSP displacement (for POP [ESP]).
144 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
145 * SIB byte (bits 39:32).
146 *
147 * @note This must be defined in a source file with matching
148 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
149 * or implemented differently...
150 */
151RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
152{
153 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
154# define SET_SS_DEF() \
155 do \
156 { \
157 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
158 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
159 } while (0)
160
161 if (!IEM_IS_64BIT_CODE(pVCpu))
162 {
163/** @todo Check the effective address size crap! */
164 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
165 {
166 uint16_t u16EffAddr;
167
168 /* Handle the disp16 form with no registers first. */
169 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
170 {
171 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
172 *puInfo = u16EffAddr;
173 }
174 else
175 {
176 /* Get the displacment. */
177 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
178 {
179 case 0: u16EffAddr = 0; break;
180 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
181 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
182 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
183 }
184 *puInfo = u16EffAddr;
185
186 /* Add the base and index registers to the disp. */
187 switch (bRm & X86_MODRM_RM_MASK)
188 {
189 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
190 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
191 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
192 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
193 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
194 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
195 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
196 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
197 }
198 }
199
200 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
201 return u16EffAddr;
202 }
203
204 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
205 uint32_t u32EffAddr;
206 uint64_t uInfo;
207
208 /* Handle the disp32 form with no registers first. */
209 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
210 {
211 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
212 uInfo = u32EffAddr;
213 }
214 else
215 {
216 /* Get the register (or SIB) value. */
217 uInfo = 0;
218 switch ((bRm & X86_MODRM_RM_MASK))
219 {
220 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
221 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
222 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
223 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
224 case 4: /* SIB */
225 {
226 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
227 uInfo = (uint64_t)bSib << 32;
228
229 /* Get the index and scale it. */
230 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
231 {
232 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
233 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
234 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
235 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
236 case 4: u32EffAddr = 0; /*none */ break;
237 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
238 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
239 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
240 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
241 }
242 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
243
244 /* add base */
245 switch (bSib & X86_SIB_BASE_MASK)
246 {
247 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
248 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
249 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
250 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
251 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
252 case 5:
253 if ((bRm & X86_MODRM_MOD_MASK) != 0)
254 {
255 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
256 SET_SS_DEF();
257 }
258 else
259 {
260 uint32_t u32Disp;
261 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
262 u32EffAddr += u32Disp;
263 uInfo |= u32Disp;
264 }
265 break;
266 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
267 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
268 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
269 }
270 break;
271 }
272 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
273 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
274 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
275 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
276 }
277
278 /* Get and add the displacement. */
279 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
280 {
281 case 0:
282 break;
283 case 1:
284 {
285 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
286 u32EffAddr += i8Disp;
287 uInfo |= (uint32_t)(int32_t)i8Disp;
288 break;
289 }
290 case 2:
291 {
292 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
293 u32EffAddr += u32Disp;
294 uInfo |= u32Disp;
295 break;
296 }
297 default:
298 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
299 }
300 }
301
302 *puInfo = uInfo;
303 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
304 return u32EffAddr;
305 }
306
307 uint64_t u64EffAddr;
308 uint64_t uInfo;
309
310 /* Handle the rip+disp32 form with no registers first. */
311 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
312 {
313 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
314 uInfo = (uint32_t)u64EffAddr;
315 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
316 }
317 else
318 {
319 /* Get the register (or SIB) value. */
320 uInfo = 0;
321 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
322 {
323 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
324 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
325 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
326 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
327 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
328 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
329 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
330 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
331 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
332 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
333 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
334 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
335 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
336 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
337 /* SIB */
338 case 4:
339 case 12:
340 {
341 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
342 uInfo = (uint64_t)bSib << 32;
343
344 /* Get the index and scale it. */
345 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
346 {
347 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
348 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
349 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
350 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
351 case 4: u64EffAddr = 0; /*none */ break;
352 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
353 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
354 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
355 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
356 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
357 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
358 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
359 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
360 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
361 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
362 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
363 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
364 }
365 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
366
367 /* add base */
368 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
369 {
370 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
371 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
372 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
373 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
374 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
375 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
376 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
377 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
378 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
379 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
380 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
381 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
382 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
383 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
384 /* complicated encodings */
385 case 5:
386 case 13:
387 if ((bRm & X86_MODRM_MOD_MASK) != 0)
388 {
389 if (!pVCpu->iem.s.uRexB)
390 {
391 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
392 SET_SS_DEF();
393 }
394 else
395 u64EffAddr += pVCpu->cpum.GstCtx.r13;
396 }
397 else
398 {
399 uint32_t u32Disp;
400 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
401 u64EffAddr += (int32_t)u32Disp;
402 uInfo |= u32Disp;
403 }
404 break;
405 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
406 }
407 break;
408 }
409 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
410 }
411
412 /* Get and add the displacement. */
413 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
414 {
415 case 0:
416 break;
417 case 1:
418 {
419 int8_t i8Disp;
420 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
421 u64EffAddr += i8Disp;
422 uInfo |= (uint32_t)(int32_t)i8Disp;
423 break;
424 }
425 case 2:
426 {
427 uint32_t u32Disp;
428 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
429 u64EffAddr += (int32_t)u32Disp;
430 uInfo |= u32Disp;
431 break;
432 }
433 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
434 }
435
436 }
437
438 *puInfo = uInfo;
439 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
440 {
441 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
442 return u64EffAddr;
443 }
444 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
445 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
446 return u64EffAddr & UINT32_MAX;
447}
448
449
450
451/*********************************************************************************************************************************
452* Translation Block Cache. *
453*********************************************************************************************************************************/
454
455/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
456static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
457{
458 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
459 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
460 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
461 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
462 if (cMsSinceUse1 != cMsSinceUse2)
463 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
464 if (pTb1->cUsed != pTb2->cUsed)
465 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
466 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
467 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
468 return 0;
469}
470
471#ifdef VBOX_STRICT
472/**
473 * Assertion helper that checks a collisions list count.
474 */
475static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
476{
477 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
478 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
479 while (pTb)
480 {
481 pTb = pTb->pNext;
482 cLeft--;
483 }
484 AssertMsg(cLeft == 0,
485 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
486 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
487}
488#endif
489
490
491DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
492{
493 STAM_PROFILE_START(&pTbCache->StatPrune, a);
494
495 /*
496 * First convert the collision list to an array.
497 */
498 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
499 uintptr_t cInserted = 0;
500 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
501
502 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
503
504 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
505 {
506 apSortedTbs[cInserted++] = pTbCollision;
507 pTbCollision = pTbCollision->pNext;
508 }
509
510 /* Free any excess (impossible). */
511 if (RT_LIKELY(!pTbCollision))
512 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
513 else
514 do
515 {
516 PIEMTB pTbToFree = pTbCollision;
517 pTbCollision = pTbToFree->pNext;
518 iemTbAllocatorFree(pVCpu, pTbToFree);
519 } while (pTbCollision);
520
521 /*
522 * Sort it by most recently used and usage count.
523 */
524 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
525
526 /* We keep half the list for now. Perhaps a bit aggressive... */
527 uintptr_t const cKeep = cInserted / 2;
528
529 /* First free up the TBs we don't wish to keep (before creating the new
530 list because otherwise the free code will scan the list for each one
531 without ever finding it). */
532 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
533 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
534
535 /* Then chain the new TB together with the ones we like to keep of the
536 existing ones and insert this list into the hash table. */
537 pTbCollision = pTb;
538 for (uintptr_t idx = 0; idx < cKeep; idx++)
539 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
540 pTbCollision->pNext = NULL;
541
542 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
543#ifdef VBOX_STRICT
544 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
545#endif
546
547 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
548}
549
550
551static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
552{
553 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
554 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
555 if (!pTbOldHead)
556 {
557 pTb->pNext = NULL;
558 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
559 }
560 else
561 {
562 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
563 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
564 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
565 {
566 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
567 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
568#ifdef VBOX_STRICT
569 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
570#endif
571 }
572 else
573 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
574 }
575}
576
577
578/**
579 * Unlinks @a pTb from the hash table if found in it.
580 *
581 * @returns true if unlinked, false if not present.
582 * @param pTbCache The hash table.
583 * @param pTb The TB to remove.
584 */
585static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
586{
587 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
588 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
589 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
590
591 /*
592 * At the head of the collision list?
593 */
594 if (pTbHash == pTb)
595 {
596 if (!pTb->pNext)
597 pTbCache->apHash[idxHash] = NULL;
598 else
599 {
600 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
601 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
602#ifdef VBOX_STRICT
603 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
604#endif
605 }
606 return true;
607 }
608
609 /*
610 * Search the collision list.
611 */
612 PIEMTB const pTbHead = pTbHash;
613 while (pTbHash)
614 {
615 PIEMTB const pNextTb = pTbHash->pNext;
616 if (pNextTb == pTb)
617 {
618 pTbHash->pNext = pTb->pNext;
619 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
620#ifdef VBOX_STRICT
621 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
622#endif
623 return true;
624 }
625 pTbHash = pNextTb;
626 }
627 return false;
628}
629
630
631/**
632 * Looks up a TB for the given PC and flags in the cache.
633 *
634 * @returns Pointer to TB on success, NULL if not found.
635 * @param pVCpu The cross context virtual CPU structure of the
636 * calling thread.
637 * @param pTbCache The translation block cache.
638 * @param GCPhysPc The PC to look up a TB for.
639 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
640 * the lookup.
641 * @thread EMT(pVCpu)
642 */
643static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
644 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
645{
646 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
647
648 /*
649 * First consult the lookup table entry.
650 */
651 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
652 PIEMTB pTb = *ppTbLookup;
653 if (pTb)
654 {
655 if (pTb->GCPhysPc == GCPhysPc)
656 {
657 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
658 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
659 {
660 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
661 {
662 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
663 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
664 pTb->cUsed++;
665#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
666 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
667 {
668 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
669 return pTb;
670 }
671 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
672# ifdef VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING
673 iemThreadedSaveTbForProfiling(pVCpu, pTb);
674# endif
675 return iemNativeRecompile(pVCpu, pTb);
676#else
677 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
678 return pTb;
679#endif
680 }
681 }
682 }
683 }
684
685 /*
686 * Then consult the hash table.
687 */
688 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
689#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
690 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
691#endif
692 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
693 while (pTb)
694 {
695 if (pTb->GCPhysPc == GCPhysPc)
696 {
697 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
698 {
699 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
700 {
701 STAM_COUNTER_INC(&pTbCache->cLookupHits);
702 AssertMsg(cLeft > 0, ("%d\n", cLeft));
703
704 *ppTbLookup = pTb;
705 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
706 pTb->cUsed++;
707#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
708 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
709 {
710 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
711 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
712 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
713 return pTb;
714 }
715 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
716 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
717 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
718 return iemNativeRecompile(pVCpu, pTb);
719#else
720 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
721 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
722 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
723 return pTb;
724#endif
725 }
726 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
727 }
728 else
729 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
730 }
731 else
732 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
733
734 pTb = pTb->pNext;
735#ifdef VBOX_STRICT
736 cLeft--;
737#endif
738 }
739 AssertMsg(cLeft == 0, ("%d\n", cLeft));
740 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
741 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
742 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
743 return pTb;
744}
745
746
747/*********************************************************************************************************************************
748* Translation Block Allocator.
749*********************************************************************************************************************************/
750/*
751 * Translation block allocationmanagement.
752 */
753
754#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
755# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
756 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
757# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
758 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
759# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
760 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
761#else
762# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
763 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
764# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
765 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
766# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
767 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
768#endif
769/** Makes a TB index from a chunk index and TB index within that chunk. */
770#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
771 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
772
773
774/**
775 * Initializes the TB allocator and cache for an EMT.
776 *
777 * @returns VBox status code.
778 * @param pVM The VM handle.
779 * @param cInitialTbs The initial number of translation blocks to
780 * preallocator.
781 * @param cMaxTbs The max number of translation blocks allowed.
782 * @param cbInitialExec The initial size of the executable memory allocator.
783 * @param cbMaxExec The max size of the executable memory allocator.
784 * @param cbChunkExec The chunk size for executable memory allocator. Zero
785 * or UINT32_MAX for automatically determining this.
786 * @thread EMT
787 */
788DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
789 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
790{
791 PVMCPUCC pVCpu = VMMGetCpu(pVM);
792 Assert(!pVCpu->iem.s.pTbCacheR3);
793 Assert(!pVCpu->iem.s.pTbAllocatorR3);
794
795 /*
796 * Calculate the chunk size of the TB allocator.
797 * The minimum chunk size is 2MiB.
798 */
799 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
800 uint32_t cbPerChunk = _2M;
801 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
802#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
803 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
804 uint8_t cChunkShift = 21 - cTbShift;
805 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
806#endif
807 for (;;)
808 {
809 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
810 break;
811 cbPerChunk *= 2;
812 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
813#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
814 cChunkShift += 1;
815#endif
816 }
817
818 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
819 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
820 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
821
822 cMaxTbs = cMaxChunks * cTbsPerChunk;
823
824 /*
825 * Allocate and initalize it.
826 */
827 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
828 if (!pTbAllocator)
829 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
830 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
831 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
832 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
833 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
834 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
835 pTbAllocator->cbPerChunk = cbPerChunk;
836 pTbAllocator->cMaxTbs = cMaxTbs;
837 pTbAllocator->pTbsFreeHead = NULL;
838#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
839 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
840 pTbAllocator->cChunkShift = cChunkShift;
841 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
842#endif
843
844 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
845
846 /*
847 * Allocate the initial chunks.
848 */
849 for (uint32_t idxChunk = 0; ; idxChunk++)
850 {
851 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
852 if (!paTbs)
853 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
854 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
855 cbPerChunk, idxChunk, pVCpu->idCpu);
856
857 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
858 {
859 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
860 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
861 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
862 }
863 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
864 pTbAllocator->cTotalTbs += cTbsPerChunk;
865
866 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
867 break;
868 }
869
870 /*
871 * Calculate the size of the hash table. We double the max TB count and
872 * round it up to the nearest power of two.
873 */
874 uint32_t cCacheEntries = cMaxTbs * 2;
875 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
876 {
877 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
878 cCacheEntries = RT_BIT_32(iBitTop);
879 Assert(cCacheEntries >= cMaxTbs * 2);
880 }
881
882 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
883 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
884 if (!pTbCache)
885 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
886 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
887 cbTbCache, cCacheEntries, pVCpu->idCpu);
888
889 /*
890 * Initialize it (assumes zeroed by the allocator).
891 */
892 pTbCache->uMagic = IEMTBCACHE_MAGIC;
893 pTbCache->cHash = cCacheEntries;
894 pTbCache->uHashMask = cCacheEntries - 1;
895 Assert(pTbCache->cHash > pTbCache->uHashMask);
896 pVCpu->iem.s.pTbCacheR3 = pTbCache;
897
898 /*
899 * Initialize the native executable memory allocator.
900 */
901#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
902 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
903 AssertLogRelRCReturn(rc, rc);
904#else
905 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
906#endif
907
908 return VINF_SUCCESS;
909}
910
911
912/**
913 * Inner free worker.
914 */
915static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
916 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
917{
918 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
919 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
920 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
921#ifdef VBOX_STRICT
922 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
923 Assert(pTbOther != pTb);
924#endif
925
926 /*
927 * Unlink the TB from the hash table.
928 */
929 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
930
931 /*
932 * Free the TB itself.
933 */
934 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
935 {
936 case IEMTB_F_TYPE_THREADED:
937 pTbAllocator->cThreadedTbs -= 1;
938 RTMemFree(pTb->Thrd.paCalls);
939 break;
940#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
941 case IEMTB_F_TYPE_NATIVE:
942 pTbAllocator->cNativeTbs -= 1;
943 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
944 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
945 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
946 break;
947#endif
948 default:
949 AssertFailed();
950 }
951
952 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
953
954 pTb->pNext = pTbAllocator->pTbsFreeHead;
955 pTbAllocator->pTbsFreeHead = pTb;
956 pTb->fFlags = 0;
957 pTb->GCPhysPc = UINT64_MAX;
958 pTb->Gen.uPtr = 0;
959 pTb->Gen.uData = 0;
960 pTb->cTbLookupEntries = 0;
961 pTb->cbOpcodes = 0;
962 pTb->pabOpcodes = NULL;
963
964 Assert(pTbAllocator->cInUseTbs > 0);
965
966 pTbAllocator->cInUseTbs -= 1;
967 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
968}
969
970
971/**
972 * Frees the given TB.
973 *
974 * @param pVCpu The cross context virtual CPU structure of the calling
975 * thread.
976 * @param pTb The translation block to free.
977 * @thread EMT(pVCpu)
978 */
979DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
980{
981 /*
982 * Validate state.
983 */
984 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
985 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
986 uint8_t const idxChunk = pTb->idxAllocChunk;
987 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
988 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
989 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
990
991 /*
992 * Invalidate the TB lookup pointer and call the inner worker.
993 */
994 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
995 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
996}
997
998
999/**
1000 * Schedules a TB for freeing when it's not longer being executed and/or part of
1001 * the caller's call stack.
1002 *
1003 * The TB will be removed from the translation block cache, though, so it isn't
1004 * possible to executed it again and the IEMTB::pNext member can be used to link
1005 * it together with other TBs awaiting freeing.
1006 *
1007 * @param pVCpu The cross context virtual CPU structure of the calling
1008 * thread.
1009 * @param pTb The translation block to schedule for freeing.
1010 */
1011static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1012{
1013 /*
1014 * Validate state.
1015 */
1016 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1017 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1018 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1019 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1020 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1021 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1022#ifdef VBOX_STRICT
1023 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1024 Assert(pTbOther != pTb);
1025#endif
1026
1027 /*
1028 * Remove it from the cache and prepend it to the allocator's todo list.
1029 *
1030 * Note! It could still be in various lookup tables, so we trash the GCPhys
1031 * and CS attribs to ensure it won't be reused.
1032 */
1033 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1034 pTb->GCPhysPc = NIL_RTGCPHYS;
1035 pTb->x86.fAttr = UINT16_MAX;
1036
1037 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1038 pTbAllocator->pDelayedFreeHead = pTb;
1039}
1040
1041
1042/**
1043 * Processes the delayed frees.
1044 *
1045 * This is called by the allocator function as well as the native recompile
1046 * function before making any TB or executable memory allocations respectively.
1047 */
1048void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1049{
1050 /** @todo r-bird: these have already been removed from the cache,
1051 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1052 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1053 pTbAllocator->pDelayedFreeHead = NULL;
1054 while (pTb)
1055 {
1056 PIEMTB const pTbNext = pTb->pNext;
1057 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1058 iemTbAllocatorFree(pVCpu, pTb);
1059 pTb = pTbNext;
1060 }
1061}
1062
1063
1064#if 0
1065/**
1066 * Frees all TBs.
1067 */
1068static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1069{
1070 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1071 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1072 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1073
1074 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1075
1076 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1077 while (idxChunk-- > 0)
1078 {
1079 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1080 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1081 while (idxTb-- > 0)
1082 {
1083 PIEMTB const pTb = &paTbs[idxTb];
1084 if (pTb->fFlags)
1085 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1086 }
1087 }
1088
1089 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1090
1091# if 1
1092 /* Reset the free list. */
1093 pTbAllocator->pTbsFreeHead = NULL;
1094 idxChunk = pTbAllocator->cAllocatedChunks;
1095 while (idxChunk-- > 0)
1096 {
1097 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1098 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1099 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1100 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1101 {
1102 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1103 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1104 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1105 }
1106 }
1107# endif
1108
1109# if 1
1110 /* Completely reset the TB cache. */
1111 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1112# endif
1113
1114 return VINF_SUCCESS;
1115}
1116#endif
1117
1118
1119/**
1120 * Grow the translation block allocator with another chunk.
1121 */
1122static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1123{
1124 /*
1125 * Validate state.
1126 */
1127 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1128 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1129 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1130 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1131 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1132
1133 /*
1134 * Allocate a new chunk and add it to the allocator.
1135 */
1136 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1137 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1138 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1139
1140 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1141 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1142 {
1143 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1144 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1145 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1146 }
1147 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1148 pTbAllocator->cTotalTbs += cTbsPerChunk;
1149
1150 return VINF_SUCCESS;
1151}
1152
1153
1154/**
1155 * Allocates a TB from allocator with free block.
1156 *
1157 * This is common code to both the fast and slow allocator code paths.
1158 */
1159DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1160{
1161 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1162 Assert(pTbAllocator->pTbsFreeHead);
1163
1164 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1165 pTbAllocator->pTbsFreeHead = pTb->pNext;
1166 pTbAllocator->cInUseTbs += 1;
1167 if (fThreaded)
1168 pTbAllocator->cThreadedTbs += 1;
1169 else
1170 pTbAllocator->cNativeTbs += 1;
1171 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1172 return pTb;
1173}
1174
1175
1176/**
1177 * Slow path for iemTbAllocatorAlloc.
1178 */
1179static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1180{
1181 /*
1182 * With some luck we can add another chunk.
1183 */
1184 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1185 {
1186 int rc = iemTbAllocatorGrow(pVCpu);
1187 if (RT_SUCCESS(rc))
1188 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1189 }
1190
1191 /*
1192 * We have to prune stuff. Sigh.
1193 *
1194 * This requires scanning for older TBs and kick them out. Not sure how to
1195 * best do this as we don't want to maintain any list of TBs ordered by last
1196 * usage time. But one reasonably simple approach would be that each time we
1197 * get here we continue a sequential scan of the allocation chunks,
1198 * considering just a smallish number of TBs and freeing a fixed portion of
1199 * them. Say, we consider the next 128 TBs, freeing the least recently used
1200 * in out of groups of 4 TBs, resulting in 32 free TBs.
1201 */
1202 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1203 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1204 uint32_t const cTbsToPrune = 128;
1205 uint32_t const cTbsPerGroup = 4;
1206 uint32_t cFreedTbs = 0;
1207#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1208 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1209#else
1210 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1211#endif
1212 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1213 idxTbPruneFrom = 0;
1214 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1215 {
1216 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1217 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1218 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1219 uint32_t cMsAge = msNow - pTb->msLastUsed;
1220 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1221
1222 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1223 {
1224#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1225 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1226 { /* likely */ }
1227 else
1228 {
1229 idxInChunk2 = 0;
1230 idxChunk2 += 1;
1231 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1232 idxChunk2 = 0;
1233 }
1234#endif
1235 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1236 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1237 if ( cMsAge2 > cMsAge
1238 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1239 {
1240 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1241 pTb = pTb2;
1242 idxChunk = idxChunk2;
1243 idxInChunk = idxInChunk2;
1244 cMsAge = cMsAge2;
1245 }
1246 }
1247
1248 /* Free the TB. */
1249 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1250 cFreedTbs++; /* paranoia */
1251 }
1252 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1253 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1254
1255 /* Flush the TB lookup entry pointer. */
1256 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1257
1258 /*
1259 * Allocate a TB from the ones we've pruned.
1260 */
1261 if (cFreedTbs)
1262 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1263 return NULL;
1264}
1265
1266
1267/**
1268 * Allocate a translation block.
1269 *
1270 * @returns Pointer to block on success, NULL if we're out and is unable to
1271 * free up an existing one (very unlikely once implemented).
1272 * @param pVCpu The cross context virtual CPU structure of the calling
1273 * thread.
1274 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1275 * For statistics.
1276 */
1277DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1278{
1279 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1280 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1281
1282 /* Free any pending TBs before we proceed. */
1283 if (!pTbAllocator->pDelayedFreeHead)
1284 { /* probably likely */ }
1285 else
1286 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1287
1288 /* If the allocator is full, take slow code path.*/
1289 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1290 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1291 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1292}
1293
1294
1295/**
1296 * This is called when we're out of space for native TBs.
1297 *
1298 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1299 * The difference is that we only prune native TBs and will only free any if
1300 * there are least two in a group. The conditions under which we're called are
1301 * different - there will probably be free TBs in the table when we're called.
1302 * Therefore we increase the group size and max scan length, though we'll stop
1303 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1304 * up at least 8 TBs.
1305 */
1306void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1307{
1308 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1309 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1310
1311 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1312
1313 /*
1314 * Flush the delayed free list before we start freeing TBs indiscriminately.
1315 */
1316 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1317
1318 /*
1319 * Scan and free TBs.
1320 */
1321 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1322 uint32_t const cTbsToPrune = 128 * 8;
1323 uint32_t const cTbsPerGroup = 4 * 4;
1324 uint32_t cFreedTbs = 0;
1325 uint32_t cMaxInstrs = 0;
1326 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1327 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1328 {
1329 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1330 idxTbPruneFrom = 0;
1331 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1332 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1333 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1334 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1335 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1336
1337 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1338 {
1339 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1340 { /* likely */ }
1341 else
1342 {
1343 idxInChunk2 = 0;
1344 idxChunk2 += 1;
1345 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1346 idxChunk2 = 0;
1347 }
1348 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1349 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1350 {
1351 cNativeTbs += 1;
1352 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1353 if ( cMsAge2 > cMsAge
1354 || ( cMsAge2 == cMsAge
1355 && ( pTb2->cUsed < pTb->cUsed
1356 || ( pTb2->cUsed == pTb->cUsed
1357 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1358 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1359 {
1360 pTb = pTb2;
1361 idxChunk = idxChunk2;
1362 idxInChunk = idxInChunk2;
1363 cMsAge = cMsAge2;
1364 }
1365 }
1366 }
1367
1368 /* Free the TB if we found at least two native one in this group. */
1369 if (cNativeTbs >= 2)
1370 {
1371 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1372 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1373 cFreedTbs++;
1374 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1375 break;
1376 }
1377 }
1378 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1379
1380 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1381}
1382
1383
1384/*********************************************************************************************************************************
1385* Threaded Recompiler Core *
1386*********************************************************************************************************************************/
1387/**
1388 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1389 * @returns pszBuf.
1390 * @param fFlags The flags.
1391 * @param pszBuf The output buffer.
1392 * @param cbBuf The output buffer size. At least 32 bytes.
1393 */
1394DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1395{
1396 Assert(cbBuf >= 32);
1397 static RTSTRTUPLE const s_aModes[] =
1398 {
1399 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1400 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1401 /* [02] = */ { RT_STR_TUPLE("!2!") },
1402 /* [03] = */ { RT_STR_TUPLE("!3!") },
1403 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1404 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1405 /* [06] = */ { RT_STR_TUPLE("!6!") },
1406 /* [07] = */ { RT_STR_TUPLE("!7!") },
1407 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1408 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1409 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1410 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1411 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1412 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1413 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1414 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1415 /* [10] = */ { RT_STR_TUPLE("!10!") },
1416 /* [11] = */ { RT_STR_TUPLE("!11!") },
1417 /* [12] = */ { RT_STR_TUPLE("!12!") },
1418 /* [13] = */ { RT_STR_TUPLE("!13!") },
1419 /* [14] = */ { RT_STR_TUPLE("!14!") },
1420 /* [15] = */ { RT_STR_TUPLE("!15!") },
1421 /* [16] = */ { RT_STR_TUPLE("!16!") },
1422 /* [17] = */ { RT_STR_TUPLE("!17!") },
1423 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1424 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1425 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1426 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1427 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1428 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1429 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1430 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1431 };
1432 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1433 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1434 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1435
1436 pszBuf[off++] = ' ';
1437 pszBuf[off++] = 'C';
1438 pszBuf[off++] = 'P';
1439 pszBuf[off++] = 'L';
1440 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1441 Assert(off < 32);
1442
1443 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1444
1445 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1446 {
1447 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1448 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1449 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1450 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1451 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1452 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1453 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1454 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1455 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1456 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1457 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1458 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1459 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1460 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1461 };
1462 if (fFlags)
1463 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1464 if (s_aFlags[i].fFlag & fFlags)
1465 {
1466 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1467 pszBuf[off++] = ' ';
1468 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1469 off += s_aFlags[i].cchName;
1470 fFlags &= ~s_aFlags[i].fFlag;
1471 if (!fFlags)
1472 break;
1473 }
1474 pszBuf[off] = '\0';
1475
1476 return pszBuf;
1477}
1478
1479
1480/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1481static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1482{
1483 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1484 pDis->cbCachedInstr += cbMaxRead;
1485 RT_NOREF(cbMinRead);
1486 return VERR_NO_DATA;
1487}
1488
1489
1490/**
1491 * Worker for iemThreadedDisassembleTb.
1492 */
1493static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1494 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1495{
1496 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1497 {
1498 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1499 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1500 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1501 {
1502 PIEMTB pLookupTb = papTbLookup[iLookup];
1503 if (pLookupTb)
1504 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1505 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1506 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1507 : "invalid");
1508 else
1509 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1510 }
1511 pHlp->pfnPrintf(pHlp, "\n");
1512 }
1513 else
1514 {
1515 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1516 idxFirst, cEntries, pTb->cTbLookupEntries);
1517 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1518 }
1519}
1520
1521
1522DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1523{
1524 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1525
1526 char szDisBuf[512];
1527
1528 /*
1529 * Print TB info.
1530 */
1531 pHlp->pfnPrintf(pHlp,
1532 "pTb=%p: GCPhysPc=%RGp (%RGv) cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1533 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1534 pTb, pTb->GCPhysPc, pTb->FlatPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1535 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1536
1537 /*
1538 * This disassembly is driven by the debug info which follows the native
1539 * code and indicates when it starts with the next guest instructions,
1540 * where labels are and such things.
1541 */
1542 DISSTATE Dis;
1543 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1544 uint32_t const cCalls = pTb->Thrd.cCalls;
1545 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1546 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1547 : DISCPUMODE_64BIT;
1548 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1549 uint8_t idxRange = UINT8_MAX;
1550 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1551 uint32_t offRange = 0;
1552 uint32_t offOpcodes = 0;
1553 uint32_t const cbOpcodes = pTb->cbOpcodes;
1554 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1555 bool fTbLookupSeen0 = false;
1556
1557 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1558 {
1559 /*
1560 * New opcode range?
1561 */
1562 if ( idxRange == UINT8_MAX
1563 || idxRange >= cRanges
1564 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1565 {
1566 idxRange += 1;
1567 if (idxRange < cRanges)
1568 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1569 else
1570 continue;
1571 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1572 + (pTb->aRanges[idxRange].idxPhysPage == 0
1573 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1574 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1575 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1576 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1577 pTb->aRanges[idxRange].idxPhysPage);
1578 GCPhysPc += offRange;
1579 }
1580
1581 /*
1582 * Disassemble another guest instruction?
1583 */
1584 if ( paCalls[iCall].offOpcode != offOpcodes
1585 && paCalls[iCall].cbOpcode > 0
1586 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1587 {
1588 offOpcodes = paCalls[iCall].offOpcode;
1589 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1590 uint32_t cbInstr = 1;
1591 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1592 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1593 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1594 if (RT_SUCCESS(rc))
1595 {
1596 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1597 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1598 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1599 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1600 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1601 }
1602 else
1603 {
1604 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1605 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1606 cbInstr = paCalls[iCall].cbOpcode;
1607 }
1608 GCPhysPc += cbInstr;
1609 offRange += cbInstr;
1610 }
1611
1612 /*
1613 * Dump call details.
1614 */
1615 pHlp->pfnPrintf(pHlp,
1616 " Call #%u to %s (%u args)\n",
1617 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1618 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1619 if (paCalls[iCall].uTbLookup != 0)
1620 {
1621 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1622 fTbLookupSeen0 = idxFirst == 0;
1623 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1624 }
1625
1626 /*
1627 * Snoop fExec.
1628 */
1629 switch (paCalls[iCall].enmFunction)
1630 {
1631 default:
1632 break;
1633 case kIemThreadedFunc_BltIn_CheckMode:
1634 fExec = paCalls[iCall].auParams[0];
1635 break;
1636 }
1637 }
1638
1639 if (!fTbLookupSeen0)
1640 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1641}
1642
1643
1644
1645/**
1646 * Allocate a translation block for threadeded recompilation.
1647 *
1648 * This is allocated with maxed out call table and storage for opcode bytes,
1649 * because it's only supposed to be called once per EMT to allocate the TB
1650 * pointed to by IEMCPU::pThrdCompileTbR3.
1651 *
1652 * @returns Pointer to the translation block on success, NULL on failure.
1653 * @param pVM The cross context virtual machine structure.
1654 * @param pVCpu The cross context virtual CPU structure of the calling
1655 * thread.
1656 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1657 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1658 */
1659static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1660{
1661 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1662 if (pTb)
1663 {
1664 unsigned const cCalls = 256;
1665 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1666 if (pTb->Thrd.paCalls)
1667 {
1668 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1669 if (pTb->pabOpcodes)
1670 {
1671 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1672 pTb->Thrd.cAllocated = cCalls;
1673 pTb->Thrd.cCalls = 0;
1674 pTb->cbOpcodes = 0;
1675 pTb->pNext = NULL;
1676 pTb->cUsed = 0;
1677 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1678 pTb->idxAllocChunk = UINT8_MAX;
1679 pTb->GCPhysPc = GCPhysPc;
1680 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1681 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1682 pTb->cInstructions = 0;
1683 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1684
1685 /* Init the first opcode range. */
1686 pTb->cRanges = 1;
1687 pTb->aRanges[0].cbOpcodes = 0;
1688 pTb->aRanges[0].offOpcodes = 0;
1689 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1690 pTb->aRanges[0].u2Unused = 0;
1691 pTb->aRanges[0].idxPhysPage = 0;
1692 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1693 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1694
1695 return pTb;
1696 }
1697 RTMemFree(pTb->Thrd.paCalls);
1698 }
1699 RTMemFree(pTb);
1700 }
1701 RT_NOREF(pVM);
1702 return NULL;
1703}
1704
1705
1706/**
1707 * Called on the TB that are dedicated for recompilation before it's reused.
1708 *
1709 * @param pVCpu The cross context virtual CPU structure of the calling
1710 * thread.
1711 * @param pTb The translation block to reuse.
1712 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1713 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1714 */
1715static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1716{
1717 pTb->GCPhysPc = GCPhysPc;
1718 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1719 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1720 pTb->Thrd.cCalls = 0;
1721 pTb->cbOpcodes = 0;
1722 pTb->cInstructions = 0;
1723 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1724
1725 /* Init the first opcode range. */
1726 pTb->cRanges = 1;
1727 pTb->aRanges[0].cbOpcodes = 0;
1728 pTb->aRanges[0].offOpcodes = 0;
1729 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1730 pTb->aRanges[0].u2Unused = 0;
1731 pTb->aRanges[0].idxPhysPage = 0;
1732 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1733 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1734}
1735
1736
1737/**
1738 * Used to duplicate a threded translation block after recompilation is done.
1739 *
1740 * @returns Pointer to the translation block on success, NULL on failure.
1741 * @param pVM The cross context virtual machine structure.
1742 * @param pVCpu The cross context virtual CPU structure of the calling
1743 * thread.
1744 * @param pTbSrc The TB to duplicate.
1745 */
1746static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1747{
1748 /*
1749 * Just using the heap for now. Will make this more efficient and
1750 * complicated later, don't worry. :-)
1751 */
1752 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1753 if (pTb)
1754 {
1755 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1756 memcpy(pTb, pTbSrc, sizeof(*pTb));
1757 pTb->idxAllocChunk = idxAllocChunk;
1758
1759 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1760 Assert(cCalls > 0);
1761 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1762 if (pTb->Thrd.paCalls)
1763 {
1764 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1765 Assert(cbTbLookup > 0);
1766 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1767 Assert(cbOpcodes > 0);
1768 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1769 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1770 if (pbBoth)
1771 {
1772 RT_BZERO(pbBoth, cbTbLookup);
1773 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1774 pTb->Thrd.cAllocated = cCalls;
1775 pTb->pNext = NULL;
1776 pTb->cUsed = 0;
1777 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1778 pTb->fFlags = pTbSrc->fFlags;
1779
1780 return pTb;
1781 }
1782 RTMemFree(pTb->Thrd.paCalls);
1783 }
1784 iemTbAllocatorFree(pVCpu, pTb);
1785 }
1786 RT_NOREF(pVM);
1787 return NULL;
1788
1789}
1790
1791
1792/**
1793 * Adds the given TB to the hash table.
1794 *
1795 * @param pVCpu The cross context virtual CPU structure of the calling
1796 * thread.
1797 * @param pTbCache The cache to add it to.
1798 * @param pTb The translation block to add.
1799 */
1800static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1801{
1802 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1803
1804 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1805 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1806 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1807 if (LogIs12Enabled())
1808 {
1809 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1810 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1811 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1812 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1813 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1814 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1815 pTb->aRanges[idxRange].idxPhysPage == 0
1816 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1817 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1818 }
1819}
1820
1821
1822/**
1823 * Called by opcode verifier functions when they detect a problem.
1824 */
1825void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1826{
1827 /* We cannot free the current TB (indicated by fSafeToFree) because:
1828 - A threaded TB will have its current call entry accessed
1829 to update pVCpu->iem.s.cInstructions.
1830 - A native TB will have code left to execute. */
1831 if (fSafeToFree)
1832 iemTbAllocatorFree(pVCpu, pTb);
1833 else
1834 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1835}
1836
1837
1838/*
1839 * Real code.
1840 */
1841
1842#ifdef LOG_ENABLED
1843/**
1844 * Logs the current instruction.
1845 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1846 * @param pszFunction The IEM function doing the execution.
1847 * @param idxInstr The instruction number in the block.
1848 */
1849static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1850{
1851# ifdef IN_RING3
1852 if (LogIs2Enabled())
1853 {
1854 char szInstr[256];
1855 uint32_t cbInstr = 0;
1856 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1857 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1858 szInstr, sizeof(szInstr), &cbInstr);
1859
1860 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1861 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1862 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1863 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1864 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1865 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1866 " %s\n"
1867 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1868 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1869 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1870 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1871 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1872 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1873 szInstr));
1874
1875 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1876 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1877 }
1878 else
1879# endif
1880 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1881 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1882}
1883#endif /* LOG_ENABLED */
1884
1885
1886#if 0
1887static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1888{
1889 RT_NOREF(pVM, pVCpu);
1890 return rcStrict;
1891}
1892#endif
1893
1894
1895/**
1896 * Initializes the decoder state when compiling TBs.
1897 *
1898 * This presumes that fExec has already be initialized.
1899 *
1900 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1901 * to apply fixes to them as well.
1902 *
1903 * @param pVCpu The cross context virtual CPU structure of the calling
1904 * thread.
1905 * @param fReInit Clear for the first call for a TB, set for subsequent
1906 * calls from inside the compile loop where we can skip a
1907 * couple of things.
1908 * @param fExtraFlags The extra translation block flags when @a fReInit is
1909 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1910 * checked.
1911 */
1912DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1913{
1914 /* ASSUMES: That iemInitExec was already called and that anyone changing
1915 CPU state affecting the fExec bits since then will have updated fExec! */
1916 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1917 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1918
1919 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1920
1921 /* Decoder state: */
1922 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1923 pVCpu->iem.s.enmEffAddrMode = enmMode;
1924 if (enmMode != IEMMODE_64BIT)
1925 {
1926 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1927 pVCpu->iem.s.enmEffOpSize = enmMode;
1928 }
1929 else
1930 {
1931 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1932 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1933 }
1934 pVCpu->iem.s.fPrefixes = 0;
1935 pVCpu->iem.s.uRexReg = 0;
1936 pVCpu->iem.s.uRexB = 0;
1937 pVCpu->iem.s.uRexIndex = 0;
1938 pVCpu->iem.s.idxPrefix = 0;
1939 pVCpu->iem.s.uVex3rdReg = 0;
1940 pVCpu->iem.s.uVexLength = 0;
1941 pVCpu->iem.s.fEvexStuff = 0;
1942 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1943 pVCpu->iem.s.offModRm = 0;
1944 pVCpu->iem.s.iNextMapping = 0;
1945
1946 if (!fReInit)
1947 {
1948 pVCpu->iem.s.cActiveMappings = 0;
1949 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1950 pVCpu->iem.s.fEndTb = false;
1951 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
1952 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1953 pVCpu->iem.s.fTbCrossedPage = false;
1954 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1955 pVCpu->iem.s.idxLastCheckIrqCallNo = UINT16_MAX;
1956 pVCpu->iem.s.fTbCurInstrIsSti = false;
1957 /* Force RF clearing and TF checking on first instruction in the block
1958 as we don't really know what came before and should assume the worst: */
1959 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1960 }
1961 else
1962 {
1963 Assert(pVCpu->iem.s.cActiveMappings == 0);
1964 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1965 Assert(pVCpu->iem.s.fEndTb == false);
1966 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1967 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1968 }
1969 pVCpu->iem.s.fTbCurInstr = 0;
1970
1971#ifdef DBGFTRACE_ENABLED
1972 switch (IEM_GET_CPU_MODE(pVCpu))
1973 {
1974 case IEMMODE_64BIT:
1975 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1976 break;
1977 case IEMMODE_32BIT:
1978 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1979 break;
1980 case IEMMODE_16BIT:
1981 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1982 break;
1983 }
1984#endif
1985}
1986
1987
1988/**
1989 * Initializes the opcode fetcher when starting the compilation.
1990 *
1991 * @param pVCpu The cross context virtual CPU structure of the calling
1992 * thread.
1993 */
1994DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1995{
1996 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1997#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1998 pVCpu->iem.s.offOpcode = 0;
1999#else
2000 RT_NOREF(pVCpu);
2001#endif
2002}
2003
2004
2005/**
2006 * Re-initializes the opcode fetcher between instructions while compiling.
2007 *
2008 * @param pVCpu The cross context virtual CPU structure of the calling
2009 * thread.
2010 */
2011DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
2012{
2013 if (pVCpu->iem.s.pbInstrBuf)
2014 {
2015 uint64_t off = pVCpu->cpum.GstCtx.rip;
2016 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2017 off += pVCpu->cpum.GstCtx.cs.u64Base;
2018 off -= pVCpu->iem.s.uInstrBufPc;
2019 if (off < pVCpu->iem.s.cbInstrBufTotal)
2020 {
2021 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2022 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2023 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2024 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2025 else
2026 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2027 }
2028 else
2029 {
2030 pVCpu->iem.s.pbInstrBuf = NULL;
2031 pVCpu->iem.s.offInstrNextByte = 0;
2032 pVCpu->iem.s.offCurInstrStart = 0;
2033 pVCpu->iem.s.cbInstrBuf = 0;
2034 pVCpu->iem.s.cbInstrBufTotal = 0;
2035 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2036 }
2037 }
2038 else
2039 {
2040 pVCpu->iem.s.offInstrNextByte = 0;
2041 pVCpu->iem.s.offCurInstrStart = 0;
2042 pVCpu->iem.s.cbInstrBuf = 0;
2043 pVCpu->iem.s.cbInstrBufTotal = 0;
2044#ifdef VBOX_STRICT
2045 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2046#endif
2047 }
2048#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2049 pVCpu->iem.s.offOpcode = 0;
2050#endif
2051}
2052
2053#ifdef LOG_ENABLED
2054
2055/**
2056 * Inserts a NOP call.
2057 *
2058 * This is for debugging.
2059 *
2060 * @returns true on success, false if we're out of call entries.
2061 * @param pTb The translation block being compiled.
2062 */
2063bool iemThreadedCompileEmitNop(PIEMTB pTb)
2064{
2065 /* Emit the call. */
2066 uint32_t const idxCall = pTb->Thrd.cCalls;
2067 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2068 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2069 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2070 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2071 pCall->idxInstr = pTb->cInstructions - 1;
2072 pCall->cbOpcode = 0;
2073 pCall->offOpcode = 0;
2074 pCall->uTbLookup = 0;
2075 pCall->fFlags = 0;
2076 pCall->auParams[0] = 0;
2077 pCall->auParams[1] = 0;
2078 pCall->auParams[2] = 0;
2079 return true;
2080}
2081
2082
2083/**
2084 * Called by iemThreadedCompile if cpu state logging is desired.
2085 *
2086 * @returns true on success, false if we're out of call entries.
2087 * @param pTb The translation block being compiled.
2088 */
2089bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2090{
2091 /* Emit the call. */
2092 uint32_t const idxCall = pTb->Thrd.cCalls;
2093 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2094 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2095 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2096 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2097 pCall->idxInstr = pTb->cInstructions - 1;
2098 pCall->cbOpcode = 0;
2099 pCall->offOpcode = 0;
2100 pCall->uTbLookup = 0;
2101 pCall->fFlags = 0;
2102 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2103 pCall->auParams[1] = 0;
2104 pCall->auParams[2] = 0;
2105 return true;
2106}
2107
2108#endif /* LOG_ENABLED */
2109
2110DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2111{
2112 switch (cbInstr)
2113 {
2114 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2115 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2116 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2117 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2118 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2119 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2120 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2121 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2122 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2123 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2124 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2125 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2126 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2127 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2128 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2129 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2130 }
2131}
2132
2133#ifdef IEM_WITH_INTRA_TB_JUMPS
2134
2135/**
2136 * Emits the necessary tail calls for a full TB loop-jump.
2137 */
2138static bool iemThreadedCompileFullTbJump(PVMCPUCC pVCpu, PIEMTB pTb)
2139{
2140 /*
2141 * We need a timer and maybe IRQ check before jumping, so make sure
2142 * we've got sufficient call entries left before emitting anything.
2143 */
2144 uint32_t idxCall = pTb->Thrd.cCalls;
2145 if (idxCall + 1U <= pTb->Thrd.cAllocated)
2146 {
2147 /*
2148 * We're good, emit the calls.
2149 */
2150 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2151 pTb->Thrd.cCalls = (uint16_t)(idxCall + 2);
2152
2153 /* Always check timers as we risk getting stuck in a loop otherwise. We
2154 combine it with an IRQ check if that's not performed in the TB already. */
2155 pCall->enmFunction = pVCpu->iem.s.idxLastCheckIrqCallNo < idxCall
2156 ? kIemThreadedFunc_BltIn_CheckTimers
2157 : kIemThreadedFunc_BltIn_CheckTimersAndIrq;
2158 pCall->idxInstr = 0;
2159 pCall->offOpcode = 0;
2160 pCall->cbOpcode = 0;
2161 pCall->uTbLookup = 0;
2162 pCall->fFlags = 0;
2163 pCall->auParams[0] = 0;
2164 pCall->auParams[1] = 0;
2165 pCall->auParams[2] = 0;
2166 pCall++;
2167
2168 /* The jump callentry[0]. */
2169 pCall->enmFunction = kIemThreadedFunc_BltIn_Jump;
2170 pCall->idxInstr = 0;
2171 pCall->offOpcode = 0;
2172 pCall->cbOpcode = 0;
2173 pCall->uTbLookup = 0;
2174 pCall->fFlags = 0;
2175 pCall->auParams[0] = 0; /* jump target is call zero */
2176 pCall->auParams[1] = 0;
2177 pCall->auParams[2] = 0;
2178
2179 /* Mark callentry #0 as a jump target. */
2180 pTb->Thrd.paCalls[0].fFlags |= IEMTHREADEDCALLENTRY_F_JUMP_TARGET;
2181 }
2182
2183 return false;
2184}
2185
2186/**
2187 * Called by IEM_MC2_BEGIN_EMIT_CALLS when it detects that we're back at the
2188 * first instruction and we didn't just branch to it (that's handled below).
2189 *
2190 * This will emit a loop iff everything is compatible with that.
2191 */
2192DECLHIDDEN(int) iemThreadedCompileBackAtFirstInstruction(PVMCPU pVCpu, PIEMTB pTb) RT_NOEXCEPT
2193{
2194 /* Check if the mode matches. */
2195 if ( (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2196 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS))
2197 {
2198 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected2);
2199 iemThreadedCompileFullTbJump(pVCpu, pTb);
2200 }
2201 return VINF_IEM_RECOMPILE_END_TB;
2202}
2203
2204#endif /* IEM_WITH_INTRA_TB_JUMPS */
2205
2206
2207/**
2208 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2209 *
2210 * - CS LIM check required.
2211 * - Must recheck opcode bytes.
2212 * - Previous instruction branched.
2213 * - TLB load detected, probably due to page crossing.
2214 *
2215 * @returns true if everything went well, false if we're out of space in the TB
2216 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2217 * @param pVCpu The cross context virtual CPU structure of the calling
2218 * thread.
2219 * @param pTb The translation block being compiled.
2220 */
2221bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2222{
2223 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2224 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2225#if 0
2226 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2227 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2228#endif
2229
2230 /*
2231 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2232 * see if it's needed to start checking.
2233 */
2234 bool fConsiderCsLimChecking;
2235 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2236 if ( fMode == IEM_F_MODE_X86_64BIT
2237 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2238 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2239 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2240 fConsiderCsLimChecking = false; /* already enabled or not needed */
2241 else
2242 {
2243 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2244 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2245 fConsiderCsLimChecking = true; /* likely */
2246 else
2247 {
2248 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2249 return false;
2250 }
2251 }
2252
2253 /*
2254 * Prepare call now, even before we know if can accept the instruction in this TB.
2255 * This allows us amending parameters w/o making every case suffer.
2256 */
2257 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2258 uint16_t const offOpcode = pTb->cbOpcodes;
2259 uint8_t idxRange = pTb->cRanges - 1;
2260
2261 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2262 pCall->idxInstr = pTb->cInstructions;
2263 pCall->cbOpcode = cbInstr;
2264 pCall->offOpcode = offOpcode;
2265 pCall->uTbLookup = 0;
2266 pCall->fFlags = 0;
2267 pCall->auParams[0] = (uint32_t)cbInstr
2268 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2269 /* The upper dword is sometimes used for cbStartPage. */;
2270 pCall->auParams[1] = idxRange;
2271 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2272
2273/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2274 * gotten onto. If we do, stop */
2275
2276 /*
2277 * Case 1: We've branched (RIP changed).
2278 *
2279 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2280 * TB, end the TB here as it is most likely a loop and if it
2281 * made sense to unroll it, the guest code compiler should've
2282 * done it already.
2283 *
2284 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2285 * Req: 1 extra range, no extra phys.
2286 *
2287 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2288 * necessary (fTbCrossedPage is true).
2289 * Req: 1 extra range, probably 1 extra phys page entry.
2290 *
2291 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2292 * but in addition we cross into the following page and require
2293 * another TLB load.
2294 * Req: 2 extra ranges, probably 2 extra phys page entries.
2295 *
2296 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2297 * the following page (thus fTbCrossedPage is true).
2298 * Req: 2 extra ranges, probably 1 extra phys page entry.
2299 *
2300 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2301 * it may trigger "spuriously" from the CPU point of view because of
2302 * physical page changes that'll invalid the physical TLB and trigger a
2303 * call to the function. In theory this be a big deal, just a bit
2304 * performance loss as we'll pick the LoadingTlb variants.
2305 *
2306 * Note! We do not currently optimize branching to the next instruction (sorry
2307 * 32-bit PIC code). We could maybe do that in the branching code that
2308 * sets (or not) fTbBranched.
2309 */
2310 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2311 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2312 * code. This'll require filtering out far jmps and calls, as they
2313 * load CS which should technically be considered indirect since the
2314 * GDT/LDT entry's base address can be modified independently from
2315 * the code. */
2316 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2317 {
2318 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2319 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2320 {
2321 /* 1a + 1b - instruction fully within the branched to page. */
2322 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2323 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2324
2325 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2326 {
2327 /* Check that we've got a free range. */
2328 idxRange += 1;
2329 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2330 { /* likely */ }
2331 else
2332 {
2333 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2334 return false;
2335 }
2336 pCall->auParams[1] = idxRange;
2337 pCall->auParams[2] = 0;
2338
2339 /* Check that we've got a free page slot. */
2340 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2341 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2342 uint8_t idxPhysPage;
2343 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2344 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2345 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2346 {
2347 pTb->aGCPhysPages[0] = GCPhysNew;
2348 pTb->aRanges[idxRange].idxPhysPage = 1;
2349 idxPhysPage = UINT8_MAX;
2350 }
2351 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2352 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2353 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2354 {
2355 pTb->aGCPhysPages[1] = GCPhysNew;
2356 pTb->aRanges[idxRange].idxPhysPage = 2;
2357 idxPhysPage = UINT8_MAX;
2358 }
2359 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2360 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2361 else
2362 {
2363 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2364 return false;
2365 }
2366
2367 /* Loop check: We weave the loop check in here to optimize the lookup. */
2368 if (idxPhysPage != UINT8_MAX)
2369 {
2370 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2371 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2372 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2373 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2374 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2375 {
2376 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2377#ifdef IEM_WITH_INTRA_TB_JUMPS
2378 /* If we're looping back to the start of the TB and the mode is still the same,
2379 we could emit a jump optimization. For now we don't do page transitions
2380 as that implies TLB loading and such. */
2381 if ( idxLoopRange == 0
2382 && offPhysPc == pTb->aRanges[0].offPhysPage
2383 && (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2384 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS)
2385 && (pVCpu->iem.s.fTbBranched & ( IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR
2386 | IEMBRANCHED_F_STACK | IEMBRANCHED_F_RELATIVE))
2387 == IEMBRANCHED_F_RELATIVE)
2388 {
2389 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected);
2390 return iemThreadedCompileFullTbJump(pVCpu, pTb);
2391 }
2392#endif
2393 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2394 return false;
2395 }
2396 }
2397
2398 /* Finish setting up the new range. */
2399 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2400 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2401 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2402 pTb->aRanges[idxRange].u2Unused = 0;
2403 pTb->cRanges++;
2404 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2405 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2406 pTb->aRanges[idxRange].offOpcodes));
2407 }
2408 else
2409 {
2410 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2411 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2412 }
2413
2414 /* Determin which function we need to load & check.
2415 Note! For jumps to a new page, we'll set both fTbBranched and
2416 fTbCrossedPage to avoid unnecessary TLB work for intra
2417 page branching */
2418 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2419 || pVCpu->iem.s.fTbCrossedPage)
2420 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2421 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2422 : !fConsiderCsLimChecking
2423 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2424 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2425 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2426 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2427 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2428 : !fConsiderCsLimChecking
2429 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2430 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2431 else
2432 {
2433 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2434 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2435 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2436 : !fConsiderCsLimChecking
2437 ? kIemThreadedFunc_BltIn_CheckOpcodes
2438 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2439 }
2440 }
2441 else
2442 {
2443 /* 1c + 1d - instruction crosses pages. */
2444 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2445 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2446
2447 /* Lazy bird: Check that this isn't case 1c, since we've already
2448 load the first physical address. End the TB and
2449 make it a case 2b instead.
2450
2451 Hmm. Too much bother to detect, so just do the same
2452 with case 1d as well. */
2453#if 0 /** @todo get back to this later when we've got the actual branch code in
2454 * place. */
2455 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2456
2457 /* Check that we've got two free ranges. */
2458 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2459 { /* likely */ }
2460 else
2461 return false;
2462 idxRange += 1;
2463 pCall->auParams[1] = idxRange;
2464 pCall->auParams[2] = 0;
2465
2466 /* ... */
2467
2468#else
2469 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2470 return false;
2471#endif
2472 }
2473 }
2474
2475 /*
2476 * Case 2: Page crossing.
2477 *
2478 * Sub-case 2a: The instruction starts on the first byte in the next page.
2479 *
2480 * Sub-case 2b: The instruction has opcode bytes in both the current and
2481 * following page.
2482 *
2483 * Both cases requires a new range table entry and probably a new physical
2484 * page entry. The difference is in which functions to emit and whether to
2485 * add bytes to the current range.
2486 */
2487 else if (pVCpu->iem.s.fTbCrossedPage)
2488 {
2489 /* Check that we've got a free range. */
2490 idxRange += 1;
2491 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2492 { /* likely */ }
2493 else
2494 {
2495 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2496 return false;
2497 }
2498
2499 /* Check that we've got a free page slot. */
2500 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2501 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2502 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2503 pTb->aRanges[idxRange].idxPhysPage = 0;
2504 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2505 || pTb->aGCPhysPages[0] == GCPhysNew)
2506 {
2507 pTb->aGCPhysPages[0] = GCPhysNew;
2508 pTb->aRanges[idxRange].idxPhysPage = 1;
2509 }
2510 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2511 || pTb->aGCPhysPages[1] == GCPhysNew)
2512 {
2513 pTb->aGCPhysPages[1] = GCPhysNew;
2514 pTb->aRanges[idxRange].idxPhysPage = 2;
2515 }
2516 else
2517 {
2518 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2519 return false;
2520 }
2521
2522 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2523 {
2524 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2525 pCall->auParams[1] = idxRange;
2526 pCall->auParams[2] = 0;
2527
2528 /* Finish setting up the new range. */
2529 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2530 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2531 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2532 pTb->aRanges[idxRange].u2Unused = 0;
2533 pTb->cRanges++;
2534 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2535 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2536 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2537
2538 /* Determin which function we need to load & check. */
2539 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2540 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2541 : !fConsiderCsLimChecking
2542 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2543 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2544 }
2545 else
2546 {
2547 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2548 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2549 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2550 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2551
2552 /* We've good. Split the instruction over the old and new range table entries. */
2553 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2554
2555 pTb->aRanges[idxRange].offPhysPage = 0;
2556 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2557 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2558 pTb->aRanges[idxRange].u2Unused = 0;
2559 pTb->cRanges++;
2560 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2561 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2562 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2563
2564 /* Determin which function we need to load & check. */
2565 if (pVCpu->iem.s.fTbCheckOpcodes)
2566 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2567 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2568 : !fConsiderCsLimChecking
2569 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2570 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2571 else
2572 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2573 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2574 : !fConsiderCsLimChecking
2575 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2576 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2577 }
2578 }
2579
2580 /*
2581 * Regular case: No new range required.
2582 */
2583 else
2584 {
2585 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2586 if (pVCpu->iem.s.fTbCheckOpcodes)
2587 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2588 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2589 : kIemThreadedFunc_BltIn_CheckOpcodes;
2590 else
2591 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2592
2593 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2594 pTb->cbOpcodes = offOpcode + cbInstr;
2595 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2596 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2597 }
2598
2599 /*
2600 * Commit the call.
2601 */
2602 pTb->Thrd.cCalls++;
2603
2604 /*
2605 * Clear state.
2606 */
2607 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2608 pVCpu->iem.s.fTbCrossedPage = false;
2609 pVCpu->iem.s.fTbCheckOpcodes = false;
2610
2611 /*
2612 * Copy opcode bytes.
2613 */
2614 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2615 pTb->cbOpcodes = offOpcode + cbInstr;
2616 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2617
2618 return true;
2619}
2620
2621
2622/**
2623 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2624 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2625 *
2626 * @returns true if anything is pending, false if not.
2627 * @param pVCpu The cross context virtual CPU structure of the calling
2628 * thread.
2629 */
2630DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2631{
2632 uint64_t fCpu = pVCpu->fLocalForcedActions;
2633 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2634#if 1
2635 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2636 if (RT_LIKELY( !fCpu
2637 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2638 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2639 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2640 return false;
2641 return true;
2642#else
2643 return false;
2644#endif
2645
2646}
2647
2648
2649/**
2650 * Called by iemThreadedCompile when a block requires a mode check.
2651 *
2652 * @returns true if we should continue, false if we're out of call entries.
2653 * @param pVCpu The cross context virtual CPU structure of the calling
2654 * thread.
2655 * @param pTb The translation block being compiled.
2656 */
2657static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2658{
2659 /* Emit the call. */
2660 uint32_t const idxCall = pTb->Thrd.cCalls;
2661 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2662 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2663 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2664 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2665 pCall->idxInstr = pTb->cInstructions - 1;
2666 pCall->cbOpcode = 0;
2667 pCall->offOpcode = 0;
2668 pCall->uTbLookup = 0;
2669 pCall->fFlags = 0;
2670 pCall->auParams[0] = pVCpu->iem.s.fExec;
2671 pCall->auParams[1] = 0;
2672 pCall->auParams[2] = 0;
2673 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2674 return true;
2675}
2676
2677
2678/**
2679 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2680 * set.
2681 *
2682 * @returns true if we should continue, false if an IRQ is deliverable or a
2683 * relevant force flag is pending.
2684 * @param pVCpu The cross context virtual CPU structure of the calling
2685 * thread.
2686 * @param pTb The translation block being compiled.
2687 * @sa iemThreadedCompileCheckIrq
2688 */
2689bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2690{
2691 /*
2692 * Skip this we've already emitted a call after the previous instruction
2693 * or if it's the first call, as we're always checking FFs between blocks.
2694 */
2695 uint32_t const idxCall = pTb->Thrd.cCalls;
2696 if ( idxCall > 0
2697 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2698 {
2699 /* Emit the call. */
2700 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2701 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2702 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2703 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2704 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2705 pCall->idxInstr = pTb->cInstructions;
2706 pCall->offOpcode = 0;
2707 pCall->cbOpcode = 0;
2708 pCall->uTbLookup = 0;
2709 pCall->fFlags = 0;
2710 pCall->auParams[0] = 0;
2711 pCall->auParams[1] = 0;
2712 pCall->auParams[2] = 0;
2713 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2714
2715 /* Reset the IRQ check value. */
2716 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2717
2718 /*
2719 * Check for deliverable IRQs and pending force flags.
2720 */
2721 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2722 }
2723 return true; /* continue */
2724}
2725
2726
2727/**
2728 * Emits an IRQ check call and checks for pending IRQs.
2729 *
2730 * @returns true if we should continue, false if an IRQ is deliverable or a
2731 * relevant force flag is pending.
2732 * @param pVCpu The cross context virtual CPU structure of the calling
2733 * thread.
2734 * @param pTb The transation block.
2735 * @sa iemThreadedCompileBeginEmitCallsComplications
2736 */
2737static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2738{
2739 /* Check again in a little bit, unless it is immediately following an STI
2740 in which case we *must* check immediately after the next instruction
2741 as well in case it's executed with interrupt inhibition. We could
2742 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2743 bs3-timers-1 which is doing sti + sti + cli. */
2744 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2745 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2746 else
2747 {
2748 pVCpu->iem.s.fTbCurInstrIsSti = false;
2749 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2750 }
2751 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2752
2753 /*
2754 * Emit the call.
2755 */
2756 uint32_t const idxCall = pTb->Thrd.cCalls;
2757 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2758 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2759 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2760 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2761 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2762 pCall->idxInstr = pTb->cInstructions;
2763 pCall->offOpcode = 0;
2764 pCall->cbOpcode = 0;
2765 pCall->uTbLookup = 0;
2766 pCall->fFlags = 0;
2767 pCall->auParams[0] = 0;
2768 pCall->auParams[1] = 0;
2769 pCall->auParams[2] = 0;
2770
2771 /*
2772 * Check for deliverable IRQs and pending force flags.
2773 */
2774 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2775}
2776
2777
2778/**
2779 * Compiles a new TB and executes it.
2780 *
2781 * We combine compilation and execution here as it makes it simpler code flow
2782 * in the main loop and it allows interpreting while compiling if we want to
2783 * explore that option.
2784 *
2785 * @returns Strict VBox status code.
2786 * @param pVM The cross context virtual machine structure.
2787 * @param pVCpu The cross context virtual CPU structure of the calling
2788 * thread.
2789 * @param GCPhysPc The physical address corresponding to the current
2790 * RIP+CS.BASE.
2791 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2792 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2793 */
2794static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2795{
2796 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2797 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2798 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2799
2800 /*
2801 * Get the TB we use for the recompiling. This is a maxed-out TB so
2802 * that'll we'll make a more efficient copy of when we're done compiling.
2803 */
2804 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2805 if (pTb)
2806 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2807 else
2808 {
2809 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2810 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2811 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2812 }
2813 pTb->FlatPc = pVCpu->iem.s.uInstrBufPc | (GCPhysPc & GUEST_PAGE_OFFSET_MASK);
2814
2815 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2816 functions may get at it. */
2817 pVCpu->iem.s.pCurTbR3 = pTb;
2818
2819#if 0
2820 /* Make sure the CheckIrq condition matches the one in EM. */
2821 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2822 const uint32_t cZeroCalls = 1;
2823#else
2824 const uint32_t cZeroCalls = 0;
2825#endif
2826
2827 /*
2828 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2829 */
2830 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2831 iemThreadedCompileInitOpcodeFetching(pVCpu);
2832 VBOXSTRICTRC rcStrict;
2833 for (;;)
2834 {
2835 /* Process the next instruction. */
2836#ifdef LOG_ENABLED
2837 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2838 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2839 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2840 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2841#endif
2842 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2843 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2844
2845 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2846#if 0
2847 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2848 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2849 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2850#endif
2851 if ( rcStrict == VINF_SUCCESS
2852 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2853 && !pVCpu->iem.s.fEndTb)
2854 {
2855 Assert(pTb->Thrd.cCalls > cCallsPrev);
2856 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2857
2858 pVCpu->iem.s.cInstructions++;
2859
2860 /* Check for mode change _after_ certain CIMPL calls, so check that
2861 we continue executing with the same mode value. */
2862 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2863 { /* probable */ }
2864 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2865 { /* extremely likely */ }
2866 else
2867 break;
2868
2869#if defined(LOG_ENABLED) && 0 /* for debugging */
2870 //iemThreadedCompileEmitNop(pTb);
2871 iemThreadedCompileEmitLogCpuState(pTb);
2872#endif
2873 }
2874 else
2875 {
2876 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2877 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2878 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2879 rcStrict = VINF_SUCCESS;
2880
2881 if (pTb->Thrd.cCalls > cZeroCalls)
2882 {
2883 if (cCallsPrev != pTb->Thrd.cCalls)
2884 pVCpu->iem.s.cInstructions++;
2885 break;
2886 }
2887
2888 pVCpu->iem.s.pCurTbR3 = NULL;
2889 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2890 }
2891
2892 /* Check for IRQs? */
2893 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2894 pVCpu->iem.s.cInstrTillIrqCheck--;
2895 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2896 break;
2897
2898 /* Still space in the TB? */
2899 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2900 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2901 && pTb->cTbLookupEntries < 127)
2902 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2903 else
2904 {
2905 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2906 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2907 break;
2908 }
2909 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2910 }
2911
2912 /*
2913 * Reserve lookup space for the final call entry if necessary.
2914 */
2915 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2916 if (pTb->Thrd.cCalls > 1)
2917 {
2918 if (pFinalCall->uTbLookup == 0)
2919 {
2920 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2921 pTb->cTbLookupEntries += 1;
2922 }
2923 }
2924 else if (pFinalCall->uTbLookup != 0)
2925 {
2926 Assert(pTb->cTbLookupEntries > 1);
2927 pFinalCall->uTbLookup -= 1;
2928 pTb->cTbLookupEntries -= 1;
2929 }
2930
2931 /*
2932 * Duplicate the TB into a completed one and link it.
2933 */
2934 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2935 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2936
2937 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2938
2939#ifdef IEM_COMPILE_ONLY_MODE
2940 /*
2941 * Execute the translation block.
2942 */
2943#endif
2944
2945 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2946}
2947
2948
2949
2950/*********************************************************************************************************************************
2951* Threaded Translation Block Saving and Restoring for Profiling the Native Recompiler *
2952*********************************************************************************************************************************/
2953#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
2954# include <iprt/message.h>
2955
2956static const SSMFIELD g_aIemThreadedTbFields[] =
2957{
2958 SSMFIELD_ENTRY( IEMTB, cUsed),
2959 SSMFIELD_ENTRY( IEMTB, msLastUsed),
2960 SSMFIELD_ENTRY_GCPHYS(IEMTB, GCPhysPc),
2961 SSMFIELD_ENTRY( IEMTB, fFlags),
2962 SSMFIELD_ENTRY( IEMTB, x86.fAttr),
2963 SSMFIELD_ENTRY( IEMTB, cRanges),
2964 SSMFIELD_ENTRY( IEMTB, cInstructions),
2965 SSMFIELD_ENTRY( IEMTB, Thrd.cCalls),
2966 SSMFIELD_ENTRY( IEMTB, cTbLookupEntries),
2967 SSMFIELD_ENTRY( IEMTB, cbOpcodes),
2968 SSMFIELD_ENTRY( IEMTB, FlatPc),
2969 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[0]),
2970 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[1]),
2971 SSMFIELD_ENTRY_TERM()
2972};
2973
2974/**
2975 * Saves a threaded TB to a dedicated saved state file.
2976 */
2977static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb)
2978{
2979 /* Only VCPU #0 for now. */
2980 if (pVCpu->idCpu != 0)
2981 return;
2982
2983 /*
2984 * Get the SSM handle, lazily opening the output file.
2985 */
2986 PSSMHANDLE const pNil = (PSSMHANDLE)~(uintptr_t)0; Assert(!RT_VALID_PTR(pNil));
2987 PSSMHANDLE pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
2988 if (pSSM && pSSM != pNil)
2989 { /* likely */ }
2990 else if (pSSM)
2991 return;
2992 else
2993 {
2994 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil;
2995 int rc = SSMR3Open("ThreadedTBsForRecompilerProfiling.sav", NULL, NULL, SSM_OPEN_F_FOR_WRITING, &pSSM);
2996 AssertLogRelRCReturnVoid(rc);
2997
2998 rc = SSMR3WriteFileHeader(pSSM, 1);
2999 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3000
3001 rc = SSMR3WriteUnitBegin(pSSM, "threaded-tbs", 1, 0);
3002 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3003 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pSSM;
3004 }
3005
3006 /*
3007 * Do the actual saving.
3008 */
3009 SSMR3PutU32(pSSM, 0); /* Indicates that another TB follows. */
3010
3011 /* The basic structure. */
3012 SSMR3PutStructEx(pSSM, pTb, sizeof(*pTb), 0 /*fFlags*/, g_aIemThreadedTbFields, NULL);
3013
3014 /* The ranges. */
3015 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3016 {
3017 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offOpcodes);
3018 SSMR3PutU16(pSSM, pTb->aRanges[iRange].cbOpcodes);
3019 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offPhysPage | (pTb->aRanges[iRange].idxPhysPage << 14));
3020 }
3021
3022 /* The opcodes. */
3023 SSMR3PutMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3024
3025 /* The threaded call table. */
3026 int rc = SSMR3PutMem(pSSM, pTb->Thrd.paCalls, sizeof(*pTb->Thrd.paCalls) * pTb->Thrd.cCalls);
3027 AssertLogRelMsgStmt(RT_SUCCESS(rc), ("rc=%Rrc\n", rc), pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil);
3028}
3029
3030
3031/**
3032 * Called by IEMR3Term to finish any open profile files.
3033 *
3034 * @note This is not called on the EMT for @a pVCpu, but rather on the thread
3035 * driving the VM termination.
3036 */
3037DECLHIDDEN(void) iemThreadedSaveTbForProfilingCleanup(PVMCPU pVCpu)
3038{
3039 PSSMHANDLE const pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3040 pVCpu->iem.s.pSsmThreadedTbsForProfiling = NULL;
3041 if (RT_VALID_PTR(pSSM))
3042 {
3043 /* Indicate that this is the end. */
3044 SSMR3PutU32(pSSM, UINT32_MAX);
3045
3046 int rc = SSMR3WriteUnitComplete(pSSM);
3047 AssertLogRelRC(rc);
3048 rc = SSMR3WriteFileFooter(pSSM);
3049 AssertLogRelRC(rc);
3050 rc = SSMR3Close(pSSM);
3051 AssertLogRelRC(rc);
3052 }
3053}
3054
3055#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER && VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING */
3056
3057#ifdef IN_RING3
3058/**
3059 * API use to process what iemThreadedSaveTbForProfiling() saved.
3060 *
3061 * @note Do not mix build types or revisions. Local changes between saving the
3062 * TBs and calling this API may cause unexpected trouble.
3063 */
3064VMMR3DECL(int) IEMR3ThreadedProfileRecompilingSavedTbs(PVM pVM, const char *pszFilename, uint32_t cMinTbs)
3065{
3066# if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3067 PVMCPU const pVCpu = pVM->apCpusR3[0];
3068
3069 /* We need to keep an eye on the TB allocator. */
3070 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
3071
3072 /*
3073 * Load the TBs from the file.
3074 */
3075 PSSMHANDLE pSSM = NULL;
3076 int rc = SSMR3Open(pszFilename, NULL, NULL, 0, &pSSM);
3077 if (RT_SUCCESS(rc))
3078 {
3079 uint32_t cTbs = 0;
3080 PIEMTB pTbHead = NULL;
3081 PIEMTB *ppTbTail = &pTbHead;
3082 uint32_t uVersion;
3083 rc = SSMR3Seek(pSSM, "threaded-tbs", 0, &uVersion);
3084 if (RT_SUCCESS(rc))
3085 {
3086 for (;; cTbs++)
3087 {
3088 /* Check for the end tag. */
3089 uint32_t uTag = 0;
3090 rc = SSMR3GetU32(pSSM, &uTag);
3091 AssertRCBreak(rc);
3092 if (uTag == UINT32_MAX)
3093 break;
3094 AssertBreakStmt(uTag == 0, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3095
3096 /* Do we have room for another TB? */
3097 if (pTbAllocator->cInUseTbs + 2 >= pTbAllocator->cMaxTbs)
3098 {
3099 RTMsgInfo("Too many TBs to load, stopping loading early.\n");
3100 break;
3101 }
3102
3103 /* Allocate a new TB. */
3104 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
3105 AssertBreakStmt(uTag == 0, rc = VERR_OUT_OF_RESOURCES);
3106
3107 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
3108 RT_ZERO(*pTb);
3109 pTb->idxAllocChunk = idxAllocChunk;
3110
3111 rc = SSMR3GetStructEx(pSSM, pTb, sizeof(*pTb), 0, g_aIemThreadedTbFields, NULL);
3112 if (RT_SUCCESS(rc))
3113 {
3114 AssertStmt(pTb->Thrd.cCalls > 0 && pTb->Thrd.cCalls <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3115 AssertStmt(pTb->cbOpcodes > 0 && pTb->cbOpcodes <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3116 AssertStmt(pTb->cRanges > 0 && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges), rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3117 AssertStmt(pTb->cTbLookupEntries > 0 && pTb->cTbLookupEntries <= 136, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3118
3119 if (RT_SUCCESS(rc))
3120 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3121 {
3122 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].offOpcodes);
3123 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].cbOpcodes);
3124 uint16_t uTmp = 0;
3125 rc = SSMR3GetU16(pSSM, &uTmp);
3126 AssertRCBreak(rc);
3127 pTb->aRanges[iRange].offPhysPage = uTmp & GUEST_PAGE_OFFSET_MASK;
3128 pTb->aRanges[iRange].idxPhysPage = uTmp >> 14;
3129
3130 AssertBreakStmt(pTb->aRanges[iRange].idxPhysPage <= RT_ELEMENTS(pTb->aGCPhysPages),
3131 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3132 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes < pTb->cbOpcodes,
3133 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3134 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes + pTb->aRanges[iRange].cbOpcodes <= pTb->cbOpcodes,
3135 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3136 }
3137
3138 if (RT_SUCCESS(rc))
3139 {
3140 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAllocZ(sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3141 if (pTb->Thrd.paCalls)
3142 {
3143 size_t const cbTbLookup = pTb->cTbLookupEntries * sizeof(PIEMTB);
3144 Assert(cbTbLookup > 0);
3145 size_t const cbOpcodes = pTb->cbOpcodes;
3146 Assert(cbOpcodes > 0);
3147 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
3148 uint8_t * const pbBoth = (uint8_t *)RTMemAllocZ(cbBoth);
3149 if (pbBoth)
3150 {
3151 pTb->pabOpcodes = &pbBoth[cbTbLookup];
3152 SSMR3GetMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3153 rc = SSMR3GetMem(pSSM, pTb->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3154 if (RT_SUCCESS(rc))
3155 {
3156 *ppTbTail = pTb;
3157 ppTbTail = &pTb->pNext;
3158 continue;
3159 }
3160 }
3161 else
3162 rc = VERR_NO_MEMORY;
3163 RTMemFree(pTb->Thrd.paCalls);
3164 }
3165 else
3166 rc = VERR_NO_MEMORY;
3167 }
3168 }
3169 iemTbAllocatorFree(pVCpu, pTb);
3170 break;
3171 }
3172 if (RT_FAILURE(rc))
3173 RTMsgError("Load error: %Rrc (cTbs=%u)", rc, cTbs);
3174 }
3175 else
3176 RTMsgError("SSMR3Seek failed on '%s': %Rrc", pszFilename, rc);
3177 SSMR3Close(pSSM);
3178 if (RT_SUCCESS(rc))
3179 {
3180 /*
3181 * Recompile the TBs.
3182 */
3183 if (pTbHead)
3184 {
3185 RTMsgInfo("Loaded %u TBs\n", cTbs);
3186 if (cTbs < cMinTbs)
3187 {
3188 RTMsgInfo("Duplicating TBs to reach %u TB target\n", cMinTbs);
3189 for (PIEMTB pTb = pTbHead;
3190 cTbs < cMinTbs && pTbAllocator->cInUseTbs + 2 <= pTbAllocator->cMaxTbs;
3191 pTb = pTb->pNext)
3192 {
3193 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead);
3194 if (!pTbCopy)
3195 break;
3196 *ppTbTail = pTbCopy;
3197 ppTbTail = &pTbCopy->pNext;
3198 cTbs++;
3199 }
3200 }
3201
3202 PIEMTB pTbWarmup = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead);
3203 if (pTbWarmup)
3204 {
3205 iemNativeRecompile(pVCpu, pTbWarmup);
3206 RTThreadSleep(512); /* to make the start visible in the profiler. */
3207 RTMsgInfo("Ready, set, go!\n");
3208
3209 if ((pTbWarmup->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3210 {
3211 uint32_t cFailed = 0;
3212 uint64_t const nsStart = RTTimeNanoTS();
3213 for (PIEMTB pTb = pTbHead; pTb; pTb = pTb->pNext)
3214 {
3215 iemNativeRecompile(pVCpu, pTb);
3216 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) != IEMTB_F_TYPE_NATIVE)
3217 cFailed++;
3218 }
3219 uint64_t const cNsElapsed = RTTimeNanoTS() - nsStart;
3220 RTMsgInfo("Recompiled %u TBs in %'RU64 ns - averaging %'RU64 ns/TB\n",
3221 cTbs, cNsElapsed, (cNsElapsed + cTbs - 1) / cTbs);
3222 if (cFailed)
3223 {
3224 RTMsgError("Unforuntately %u TB failed!", cFailed);
3225 rc = VERR_GENERAL_FAILURE;
3226 }
3227 RTThreadSleep(128); /* Another gap in the profiler timeline. */
3228 }
3229 else
3230 {
3231 RTMsgError("Failed to recompile the first TB!");
3232 rc = VERR_GENERAL_FAILURE;
3233 }
3234 }
3235 else
3236 rc = VERR_NO_MEMORY;
3237 }
3238 else
3239 {
3240 RTMsgError("'%s' contains no TBs!", pszFilename);
3241 rc = VERR_NO_DATA;
3242 }
3243 }
3244 }
3245 else
3246 RTMsgError("SSMR3Open failed on '%s': %Rrc", pszFilename, rc);
3247 return rc;
3248
3249# else
3250 RT_NOREF(pVM, pszFilename, cMinTbs);
3251 return VERR_NOT_IMPLEMENTED;
3252# endif
3253}
3254#endif /* IN_RING3 */
3255
3256
3257/*********************************************************************************************************************************
3258* Recompiled Execution Core *
3259*********************************************************************************************************************************/
3260
3261/** Default TB factor.
3262 * This is basically the number of nanoseconds we guess executing a TB takes
3263 * on average. We estimates it high if we can.
3264 * @note Best if this is a power of two so it can be translated to a shift. */
3265#define IEM_TIMER_POLL_DEFAULT_FACTOR UINT32_C(64)
3266/** The minimum number of nanoseconds we can allow between timer pollings.
3267 * This must take the cost of TMTimerPollBoolWithNanoTS into mind. We put that
3268 * cost at 104 ns now, thus this constant is at 256 ns. */
3269#define IEM_TIMER_POLL_MIN_NS UINT32_C(256)
3270/** The IEM_TIMER_POLL_MIN_NS value roughly translated to TBs, with some grains
3271 * of salt thrown in.
3272 * The idea is that we will be able to make progress with guest code execution
3273 * before polling timers and between running timers. */
3274#define IEM_TIMER_POLL_MIN_ITER UINT32_C(12)
3275/** The maximum number of nanoseconds we can allow between timer pollings.
3276 * This probably shouldn't be too high, as we don't have any timer
3277 * reprogramming feedback in the polling code. So, when a device reschedule a
3278 * timer for an earlier delivery, we won't know about it. */
3279#define IEM_TIMER_POLL_MAX_NS UINT32_C(8388608) /* 0x800000 ns = 8.4 ms */
3280/** The IEM_TIMER_POLL_MAX_NS value roughly translated to TBs, with some grains
3281 * of salt thrown in.
3282 * This helps control fluctuations in the NU benchmark. */
3283#define IEM_TIMER_POLL_MAX_ITER _512K
3284
3285#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3286/**
3287 * Calculates the number of TBs till the next timer polling using defaults.
3288 *
3289 * This is used when the previous run wasn't long enough to provide sufficient
3290 * data and when comming back from the HALT state and we haven't actually
3291 * executed anything for a while.
3292 */
3293DECL_FORCE_INLINE(uint32_t) iemPollTimersCalcDefaultCountdown(uint64_t cNsDelta) RT_NOEXCEPT
3294{
3295 if (cNsDelta >= IEM_TIMER_POLL_MAX_NS)
3296 return RT_MIN(IEM_TIMER_POLL_MAX_NS / IEM_TIMER_POLL_DEFAULT_FACTOR, IEM_TIMER_POLL_MAX_ITER);
3297
3298 cNsDelta = RT_BIT_64(ASMBitFirstSetU32(cNsDelta) - 1); /* round down to power of 2 */
3299 uint32_t const cRet = cNsDelta / IEM_TIMER_POLL_DEFAULT_FACTOR;
3300 if (cRet >= IEM_TIMER_POLL_MIN_ITER)
3301 {
3302 if (cRet <= IEM_TIMER_POLL_MAX_ITER)
3303 return cRet;
3304 return IEM_TIMER_POLL_MAX_ITER;
3305 }
3306 return IEM_TIMER_POLL_MIN_ITER;
3307}
3308#endif
3309
3310
3311/**
3312 * Helper for polling timers.
3313 */
3314DECLHIDDEN(int) iemPollTimers(PVMCC pVM, PVMCPUCC pVCpu) RT_NOEXCEPT
3315{
3316 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPoll, a);
3317
3318 /*
3319 * Check for VM_FF_TM_VIRTUAL_SYNC and call TMR3VirtualSyncFF if set.
3320 * This is something all EMTs can do.
3321 */
3322 /* If the virtual sync FF is set, respond to it. */
3323 bool fRanTimers = VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC);
3324 if (!fRanTimers)
3325 { /* likely */ }
3326 else
3327 {
3328 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3329 TMR3VirtualSyncFF(pVM, pVCpu);
3330 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3331 }
3332
3333 /*
3334 * Poll timers.
3335 *
3336 * On the 10980xe the polling averaging 314 ticks, with a min of 201, while
3337 * running a norton utilities DOS benchmark program. TSC runs at 3GHz,
3338 * translating that to 104 ns and 67 ns respectively. (An M2 booting win11
3339 * has an average of 2 ticks / 84 ns.)
3340 *
3341 * With the same setup the TMR3VirtualSyncFF and else branch here profiles
3342 * to 79751 ticks / 26583 ns on average, with a min of 1194 ticks / 398 ns.
3343 * (An M2 booting win11 has an average of 24 ticks / 1008 ns, with a min of
3344 * 8 ticks / 336 ns.)
3345 *
3346 * If we get a zero return value we run timers. Non-timer EMTs shouldn't
3347 * ever see a zero value here, so we just call TMR3TimerQueuesDo. However,
3348 * we do not re-run timers if we already called TMR3VirtualSyncFF above, we
3349 * try to make sure some code is executed first.
3350 */
3351 uint64_t nsNow = 0;
3352 uint64_t cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3353 if (cNsDelta >= 1) /* It is okay to run virtual sync timers a little early. */
3354 { /* likely */ }
3355 else if (!fRanTimers || VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
3356 {
3357 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3358 TMR3TimerQueuesDo(pVM);
3359 fRanTimers = true;
3360 nsNow = 0;
3361 cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3362 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3363 }
3364 else
3365 cNsDelta = 33;
3366
3367 /*
3368 * Calc interval and update the timestamps.
3369 */
3370 uint64_t const cNsSinceLast = nsNow - pVCpu->iem.s.nsRecompilerPollNow;
3371 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3372 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3373
3374 /*
3375 * Set the next polling count down value.
3376 *
3377 * We take the previous value and adjust it according to the cNsSinceLast
3378 * value, if it's not within reason. This can't be too accurate since the
3379 * CheckIrq and intra-TB-checks aren't evenly spaced, they depends highly
3380 * on the guest code.
3381 */
3382#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3383 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3384 if (cNsDelta >= RT_NS_1SEC / 4)
3385 {
3386 /*
3387 * Non-timer EMTs should end up here with a fixed 500ms delta, just return
3388 * the max and keep the polling over head to the deadicated timer EMT.
3389 */
3390 AssertCompile(IEM_TIMER_POLL_MAX_ITER * IEM_TIMER_POLL_DEFAULT_FACTOR <= RT_NS_100MS);
3391 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3392 }
3393 else
3394 {
3395 /*
3396 * This is the timer EMT.
3397 */
3398 if (cNsDelta <= IEM_TIMER_POLL_MIN_NS)
3399 {
3400 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollTiny);
3401 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3402 }
3403 else
3404 {
3405 uint32_t const cNsDeltaAdj = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS : (uint32_t)cNsDelta;
3406 uint32_t const cNsDeltaSlack = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS / 2 : cNsDeltaAdj / 4;
3407 if ( cNsSinceLast < RT_MAX(IEM_TIMER_POLL_MIN_NS, 64)
3408 || cItersTillNextPoll < IEM_TIMER_POLL_MIN_ITER /* paranoia */)
3409 {
3410 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollDefaultCalc);
3411 cItersTillNextPoll = iemPollTimersCalcDefaultCountdown(cNsDeltaAdj);
3412 }
3413 else if ( cNsSinceLast >= cNsDeltaAdj + cNsDeltaSlack
3414 || cNsSinceLast <= cNsDeltaAdj - cNsDeltaSlack)
3415 {
3416 if (cNsSinceLast >= cItersTillNextPoll)
3417 {
3418 uint32_t uFactor = (uint32_t)(cNsSinceLast + cItersTillNextPoll - 1) / cItersTillNextPoll;
3419 cItersTillNextPoll = cNsDeltaAdj / uFactor;
3420 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorDivision, uFactor);
3421 }
3422 else
3423 {
3424 uint32_t uFactor = cItersTillNextPoll / (uint32_t)cNsSinceLast;
3425 cItersTillNextPoll = cNsDeltaAdj * uFactor;
3426 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorMultiplication, uFactor);
3427 }
3428
3429 if (cItersTillNextPoll >= IEM_TIMER_POLL_MIN_ITER)
3430 {
3431 if (cItersTillNextPoll <= IEM_TIMER_POLL_MAX_ITER)
3432 { /* likely */ }
3433 else
3434 {
3435 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollMax);
3436 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3437 }
3438 }
3439 else
3440 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3441 }
3442 else
3443 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollUnchanged);
3444 }
3445 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3446 }
3447#else
3448/** Poll timers every 400 us / 2500 Hz. (source: thin air) */
3449# define IEM_TIMER_POLL_IDEAL_NS (400U * RT_NS_1US)
3450 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3451 uint32_t const cNsIdealPollInterval = IEM_TIMER_POLL_IDEAL_NS;
3452 int64_t const nsFromIdeal = cNsSinceLast - cNsIdealPollInterval;
3453 if (nsFromIdeal < 0)
3454 {
3455 if ((uint64_t)-nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll < _64K)
3456 {
3457 cItersTillNextPoll += cItersTillNextPoll / 8;
3458 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3459 }
3460 }
3461 else
3462 {
3463 if ((uint64_t)nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll > 256)
3464 {
3465 cItersTillNextPoll -= cItersTillNextPoll / 8;
3466 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3467 }
3468 }
3469#endif
3470 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillNextPoll;
3471
3472 /*
3473 * Repeat the IRQ and FF checks.
3474 */
3475 if (cNsDelta > 0)
3476 {
3477 uint32_t fCpu = pVCpu->fLocalForcedActions;
3478 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3479 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3480 | VMCPU_FF_TLB_FLUSH
3481 | VMCPU_FF_UNHALT );
3482 if (RT_LIKELY( ( !fCpu
3483 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3484 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3485 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx)) ) )
3486 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
3487 {
3488 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3489 return VINF_SUCCESS;
3490 }
3491 }
3492 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3493 return VINF_IEM_REEXEC_BREAK_FF;
3494}
3495
3496
3497/** Helper for iemTbExec. */
3498DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
3499{
3500 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
3501 Assert(idx < pTb->cTbLookupEntries);
3502 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
3503}
3504
3505
3506/**
3507 * Executes a translation block.
3508 *
3509 * @returns Strict VBox status code.
3510 * @param pVCpu The cross context virtual CPU structure of the calling
3511 * thread.
3512 * @param pTb The translation block to execute.
3513 */
3514static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
3515{
3516 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
3517
3518 /*
3519 * Set the current TB so CIMPL functions may get at it.
3520 */
3521 pVCpu->iem.s.pCurTbR3 = pTb;
3522 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
3523
3524 /*
3525 * Execute the block.
3526 */
3527#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3528 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
3529 {
3530 pVCpu->iem.s.cTbExecNative++;
3531 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
3532# ifdef LOG_ENABLED
3533 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
3534# endif
3535
3536# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3537 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
3538# endif
3539# ifdef RT_ARCH_AMD64
3540 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
3541# else
3542 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
3543# endif
3544
3545# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3546 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3547# endif
3548# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3549 /* Restore FPCR/MXCSR if the TB modified it. */
3550 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3551 {
3552 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3553 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3554 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3555 }
3556# endif
3557# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3558 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
3559# endif
3560 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3561 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3562 { /* likely */ }
3563 else
3564 {
3565 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
3566 pVCpu->iem.s.pCurTbR3 = NULL;
3567
3568 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3569 only to break out of TB execution early. */
3570 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3571 {
3572 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
3573 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3574 }
3575
3576 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
3577 only to break out of TB execution early due to pending FFs. */
3578 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
3579 {
3580 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
3581 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3582 }
3583
3584 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
3585 and converted to VINF_SUCCESS or whatever is appropriate. */
3586 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
3587 {
3588 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
3589 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
3590 }
3591
3592 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
3593 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3594 }
3595 }
3596 else
3597#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
3598 {
3599 /*
3600 * The threaded execution loop.
3601 */
3602 pVCpu->iem.s.cTbExecThreaded++;
3603 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
3604#ifdef LOG_ENABLED
3605 uint64_t uRipPrev = UINT64_MAX;
3606#endif
3607 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3608 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3609 while (cCallsLeft-- > 0)
3610 {
3611#ifdef LOG_ENABLED
3612 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
3613 {
3614 uRipPrev = pVCpu->cpum.GstCtx.rip;
3615 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
3616 }
3617 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
3618 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
3619 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
3620#endif
3621#ifdef VBOX_WITH_STATISTICS
3622 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
3623 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
3624#endif
3625 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
3626 pCallEntry->auParams[0],
3627 pCallEntry->auParams[1],
3628 pCallEntry->auParams[2]);
3629 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3630 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3631 pCallEntry++;
3632 else if (rcStrict == VINF_IEM_REEXEC_JUMP)
3633 {
3634 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
3635 Assert(cCallsLeft == 0);
3636 uint32_t const idxTarget = (uint32_t)pCallEntry->auParams[0];
3637 cCallsLeft = pTb->Thrd.cCalls;
3638 AssertBreak(idxTarget < cCallsLeft - 1);
3639 cCallsLeft -= idxTarget;
3640 pCallEntry = &pTb->Thrd.paCalls[idxTarget];
3641 AssertBreak(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET);
3642 }
3643 else
3644 {
3645 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
3646 pVCpu->iem.s.pCurTbR3 = NULL;
3647 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
3648 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
3649
3650 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3651 only to break out of TB execution early. */
3652 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3653 {
3654#ifdef VBOX_WITH_STATISTICS
3655 if (pCallEntry->uTbLookup)
3656 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3657 else
3658 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3659#endif
3660 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3661 }
3662 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3663 }
3664 }
3665
3666 /* Update the lookup entry. */
3667 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3668 }
3669
3670 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3671 pVCpu->iem.s.pCurTbR3 = NULL;
3672 return VINF_SUCCESS;
3673}
3674
3675
3676/**
3677 * This is called when the PC doesn't match the current pbInstrBuf.
3678 *
3679 * Upon return, we're ready for opcode fetching. But please note that
3680 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3681 * MMIO or unassigned).
3682 */
3683static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3684{
3685 pVCpu->iem.s.pbInstrBuf = NULL;
3686 pVCpu->iem.s.offCurInstrStart = 0;
3687 pVCpu->iem.s.offInstrNextByte = 0;
3688 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3689 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3690}
3691
3692
3693/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3694DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3695{
3696 /*
3697 * Set uCurTbStartPc to RIP and calc the effective PC.
3698 */
3699 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3700#if 0 /* unused */
3701 pVCpu->iem.s.uCurTbStartPc = uPc;
3702#endif
3703 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3704 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3705
3706 /*
3707 * Advance within the current buffer (PAGE) when possible.
3708 */
3709 if (pVCpu->iem.s.pbInstrBuf)
3710 {
3711 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3712 if (off < pVCpu->iem.s.cbInstrBufTotal)
3713 {
3714 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3715 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3716 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3717 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3718 else
3719 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3720
3721 return pVCpu->iem.s.GCPhysInstrBuf + off;
3722 }
3723 }
3724 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3725}
3726
3727
3728/**
3729 * Determines the extra IEMTB_F_XXX flags.
3730 *
3731 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3732 * IEMTB_F_CS_LIM_CHECKS (or zero).
3733 * @param pVCpu The cross context virtual CPU structure of the calling
3734 * thread.
3735 */
3736DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3737{
3738 uint32_t fRet = 0;
3739
3740 /*
3741 * Determine the inhibit bits.
3742 */
3743 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3744 { /* typical */ }
3745 else
3746 {
3747 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3748 fRet |= IEMTB_F_INHIBIT_SHADOW;
3749 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3750 fRet |= IEMTB_F_INHIBIT_NMI;
3751 }
3752
3753 /*
3754 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3755 * likely to go invalid before the end of the translation block.
3756 */
3757 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3758 return fRet;
3759
3760 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3761 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3762 return fRet;
3763 return fRet | IEMTB_F_CS_LIM_CHECKS;
3764}
3765
3766
3767VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu, bool fWasHalted)
3768{
3769 /*
3770 * See if there is an interrupt pending in TRPM, inject it if we can.
3771 */
3772 if (!TRPMHasTrap(pVCpu))
3773 { /* likely */ }
3774 else
3775 {
3776 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3777 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3778 { /*likely */ }
3779 else
3780 return rcStrict;
3781 }
3782
3783 /*
3784 * Init the execution environment.
3785 */
3786#if 1 /** @todo this seems like a good idea, however if we ever share memory
3787 * directly with other threads on the host, it isn't necessarily... */
3788 if (pVM->cCpus == 1)
3789 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3790 else
3791#endif
3792 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3793
3794 if (RT_LIKELY(!fWasHalted && pVCpu->iem.s.msRecompilerPollNow != 0))
3795 { }
3796 else
3797 {
3798 /* Do polling after halt and the first time we get here. */
3799#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3800 uint64_t nsNow = 0;
3801 uint32_t const cItersTillPoll = iemPollTimersCalcDefaultCountdown(TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow));
3802 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillPoll;
3803 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillPoll;
3804#else
3805 uint64_t const nsNow = TMVirtualGetNoCheck(pVM);
3806#endif
3807 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3808 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3809 }
3810 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3811
3812 /*
3813 * Run-loop.
3814 *
3815 * If we're using setjmp/longjmp we combine all the catching here to avoid
3816 * having to call setjmp for each block we're executing.
3817 */
3818 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3819 for (;;)
3820 {
3821 VBOXSTRICTRC rcStrict;
3822 IEM_TRY_SETJMP(pVCpu, rcStrict)
3823 {
3824 for (;;)
3825 {
3826 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3827 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3828 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3829 {
3830 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3831 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3832 if (pTb)
3833 rcStrict = iemTbExec(pVCpu, pTb);
3834 else
3835 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3836 }
3837 else
3838 {
3839 /* This can only happen if the current PC cannot be translated into a
3840 host pointer, which means we're in MMIO or unmapped memory... */
3841#if defined(VBOX_STRICT) && defined(IN_RING3)
3842 rcStrict = DBGFSTOP(pVM);
3843 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3844 return rcStrict;
3845#endif
3846 rcStrict = IEMExecLots(pVCpu, 2048, 511, NULL);
3847 }
3848 if (rcStrict == VINF_SUCCESS)
3849 {
3850 Assert(pVCpu->iem.s.cActiveMappings == 0);
3851
3852 /* Note! This IRQ/FF check is repeated in iemPollTimers, iemThreadedFunc_BltIn_CheckIrq
3853 and emitted by iemNativeRecompFunc_BltIn_CheckIrq. */
3854 uint64_t fCpu = pVCpu->fLocalForcedActions;
3855 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3856 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3857 | VMCPU_FF_TLB_FLUSH
3858 | VMCPU_FF_UNHALT );
3859 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3860 if (RT_LIKELY( ( !fCpu
3861 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3862 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3863 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3864 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3865 {
3866 /* Once in a while we need to poll timers here. */
3867 if ((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
3868 { /* likely */ }
3869 else
3870 {
3871 int rc = iemPollTimers(pVM, pVCpu);
3872 if (rc != VINF_SUCCESS)
3873 return VINF_SUCCESS;
3874 }
3875 }
3876 else
3877 return VINF_SUCCESS;
3878 }
3879 else
3880 return rcStrict;
3881 }
3882 }
3883 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3884 {
3885 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3886 pVCpu->iem.s.cLongJumps++;
3887#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3888 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3889#endif
3890 if (pVCpu->iem.s.cActiveMappings > 0)
3891 iemMemRollback(pVCpu);
3892
3893#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3894 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3895 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3896 {
3897 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3898# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3899 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3900 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3901# endif
3902
3903#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3904 /* Restore FPCR/MXCSR if the TB modified it. */
3905 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3906 {
3907 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3908 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3909 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3910 }
3911#endif
3912 }
3913#endif
3914
3915#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3916 /* If pTb isn't NULL we're in iemTbExec. */
3917 if (!pTb)
3918 {
3919 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3920 pTb = pVCpu->iem.s.pCurTbR3;
3921 if (pTb)
3922 {
3923 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3924 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3925 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3926 }
3927 }
3928#endif
3929 pVCpu->iem.s.pCurTbR3 = NULL;
3930 return rcStrict;
3931 }
3932 IEM_CATCH_LONGJMP_END(pVCpu);
3933 }
3934}
3935
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette