VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 100803

Last change on this file since 100803 was 100803, checked in by vboxsync, 21 months ago

VMM/IEM: More stats. bugref:10369

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 68.3 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 100803 2023-08-04 22:04:07Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) :
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) :
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/x86.h>
86
87#ifndef TST_IEM_CHECK_MC
88# include "IEMInline.h"
89# include "IEMOpHlp.h"
90# include "IEMMc.h"
91#endif
92
93#include "IEMThreadedFunctions.h"
94
95
96/*
97 * Narrow down configs here to avoid wasting time on unused configs here.
98 */
99
100#ifndef IEM_WITH_CODE_TLB
101# error The code TLB must be enabled for the recompiler.
102#endif
103
104#ifndef IEM_WITH_DATA_TLB
105# error The data TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_SETJMP
109# error The setjmp approach must be enabled for the recompiler.
110#endif
111
112
113/*********************************************************************************************************************************
114* Internal Functions *
115*********************************************************************************************************************************/
116static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb);
117
118
119/**
120 * Calculates the effective address of a ModR/M memory operand, extended version
121 * for use in the recompilers.
122 *
123 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
124 *
125 * May longjmp on internal error.
126 *
127 * @return The effective address.
128 * @param pVCpu The cross context virtual CPU structure of the calling thread.
129 * @param bRm The ModRM byte.
130 * @param cbImmAndRspOffset - First byte: The size of any immediate
131 * following the effective address opcode bytes
132 * (only for RIP relative addressing).
133 * - Second byte: RSP displacement (for POP [ESP]).
134 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
135 * SIB byte (bits 39:32).
136 *
137 * @note This must be defined in a source file with matching
138 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
139 * or implemented differently...
140 */
141RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
142{
143 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
144# define SET_SS_DEF() \
145 do \
146 { \
147 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
148 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
149 } while (0)
150
151 if (!IEM_IS_64BIT_CODE(pVCpu))
152 {
153/** @todo Check the effective address size crap! */
154 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
155 {
156 uint16_t u16EffAddr;
157
158 /* Handle the disp16 form with no registers first. */
159 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
160 {
161 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
162 *puInfo = u16EffAddr;
163 }
164 else
165 {
166 /* Get the displacment. */
167 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
168 {
169 case 0: u16EffAddr = 0; break;
170 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
171 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
172 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
173 }
174 *puInfo = u16EffAddr;
175
176 /* Add the base and index registers to the disp. */
177 switch (bRm & X86_MODRM_RM_MASK)
178 {
179 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
180 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
181 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
182 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
183 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
184 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
185 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
186 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
187 }
188 }
189
190 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
191 return u16EffAddr;
192 }
193
194 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
195 uint32_t u32EffAddr;
196 uint64_t uInfo;
197
198 /* Handle the disp32 form with no registers first. */
199 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
200 {
201 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
202 uInfo = u32EffAddr;
203 }
204 else
205 {
206 /* Get the register (or SIB) value. */
207 uInfo = 0;
208 switch ((bRm & X86_MODRM_RM_MASK))
209 {
210 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
211 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
212 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
213 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
214 case 4: /* SIB */
215 {
216 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
217 uInfo = (uint64_t)bSib << 32;
218
219 /* Get the index and scale it. */
220 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
221 {
222 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
223 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
224 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
225 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
226 case 4: u32EffAddr = 0; /*none */ break;
227 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
228 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
229 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
230 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
231 }
232 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
233
234 /* add base */
235 switch (bSib & X86_SIB_BASE_MASK)
236 {
237 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
238 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
239 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
240 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
241 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
242 case 5:
243 if ((bRm & X86_MODRM_MOD_MASK) != 0)
244 {
245 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
246 SET_SS_DEF();
247 }
248 else
249 {
250 uint32_t u32Disp;
251 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
252 u32EffAddr += u32Disp;
253 uInfo |= u32Disp;
254 }
255 break;
256 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
257 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
258 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
259 }
260 break;
261 }
262 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
263 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
264 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
265 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
266 }
267
268 /* Get and add the displacement. */
269 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
270 {
271 case 0:
272 break;
273 case 1:
274 {
275 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
276 u32EffAddr += i8Disp;
277 uInfo |= (uint32_t)(int32_t)i8Disp;
278 break;
279 }
280 case 2:
281 {
282 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
283 u32EffAddr += u32Disp;
284 uInfo |= u32Disp;
285 break;
286 }
287 default:
288 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
289 }
290 }
291
292 *puInfo = uInfo;
293 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
294 return u32EffAddr;
295 }
296
297 uint64_t u64EffAddr;
298 uint64_t uInfo;
299
300 /* Handle the rip+disp32 form with no registers first. */
301 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
302 {
303 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
304 uInfo = (uint32_t)u64EffAddr;
305 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
306 }
307 else
308 {
309 /* Get the register (or SIB) value. */
310 uInfo = 0;
311 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
312 {
313 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
314 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
315 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
316 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
317 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
318 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
319 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
320 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
321 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
322 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
323 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
324 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
325 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
326 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
327 /* SIB */
328 case 4:
329 case 12:
330 {
331 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
332 uInfo = (uint64_t)bSib << 32;
333
334 /* Get the index and scale it. */
335 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
336 {
337 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
338 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
339 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
340 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
341 case 4: u64EffAddr = 0; /*none */ break;
342 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
343 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
344 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
345 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
346 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
347 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
348 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
349 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
350 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
351 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
352 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
353 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
354 }
355 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
356
357 /* add base */
358 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
359 {
360 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
361 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
362 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
363 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
364 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
365 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
366 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
367 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
368 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
369 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
370 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
371 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
372 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
373 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
374 /* complicated encodings */
375 case 5:
376 case 13:
377 if ((bRm & X86_MODRM_MOD_MASK) != 0)
378 {
379 if (!pVCpu->iem.s.uRexB)
380 {
381 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
382 SET_SS_DEF();
383 }
384 else
385 u64EffAddr += pVCpu->cpum.GstCtx.r13;
386 }
387 else
388 {
389 uint32_t u32Disp;
390 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
391 u64EffAddr += (int32_t)u32Disp;
392 uInfo |= u32Disp;
393 }
394 break;
395 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
396 }
397 break;
398 }
399 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
400 }
401
402 /* Get and add the displacement. */
403 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
404 {
405 case 0:
406 break;
407 case 1:
408 {
409 int8_t i8Disp;
410 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
411 u64EffAddr += i8Disp;
412 uInfo |= (uint32_t)(int32_t)i8Disp;
413 break;
414 }
415 case 2:
416 {
417 uint32_t u32Disp;
418 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
419 u64EffAddr += (int32_t)u32Disp;
420 uInfo |= u32Disp;
421 break;
422 }
423 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
424 }
425
426 }
427
428 *puInfo = uInfo;
429 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
430 {
431 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
432 return u64EffAddr;
433 }
434 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
435 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
436 return u64EffAddr & UINT32_MAX;
437}
438
439
440/*
441 * Translation block management.
442 */
443
444typedef struct IEMTBCACHE
445{
446 uint32_t cHash;
447 uint32_t uHashMask;
448 PIEMTB apHash[_1M];
449} IEMTBCACHE;
450
451static IEMTBCACHE g_TbCache = { _1M, _1M - 1, }; /**< Quick and dirty. */
452
453#define IEMTBCACHE_HASH(a_paCache, a_fTbFlags, a_GCPhysPc) \
454 ( ((uint32_t)(a_GCPhysPc) ^ (a_fTbFlags)) & (a_paCache)->uHashMask)
455
456
457/**
458 * Allocate a translation block for threadeded recompilation.
459 *
460 * @returns Pointer to the translation block on success, NULL on failure.
461 * @param pVM The cross context virtual machine structure.
462 * @param pVCpu The cross context virtual CPU structure of the calling
463 * thread.
464 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
465 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
466 */
467static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
468{
469 /*
470 * Just using the heap for now. Will make this more efficient and
471 * complicated later, don't worry. :-)
472 */
473 PIEMTB pTb = (PIEMTB)RTMemAlloc(sizeof(IEMTB));
474 if (pTb)
475 {
476 unsigned const cCalls = 128;
477 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
478 if (pTb->Thrd.paCalls)
479 {
480 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16); /* This will be reallocated later. */
481 if (pTb->pabOpcodes)
482 {
483 pTb->Thrd.cAllocated = cCalls;
484 pTb->cbOpcodesAllocated = cCalls * 16;
485 pTb->Thrd.cCalls = 0;
486 pTb->cbOpcodes = 0;
487 pTb->pNext = NULL;
488 RTListInit(&pTb->LocalList);
489 pTb->GCPhysPc = GCPhysPc;
490 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
491 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
492 pTb->cInstructions = 0;
493
494 /* Init the first opcode range. */
495 pTb->cRanges = 1;
496 pTb->aRanges[0].cbOpcodes = 0;
497 pTb->aRanges[0].offOpcodes = 0;
498 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
499 pTb->aRanges[0].u2Unused = 0;
500 pTb->aRanges[0].idxPhysPage = 0;
501 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
502 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
503
504 pVCpu->iem.s.cTbAllocs++;
505 return pTb;
506 }
507 RTMemFree(pTb->Thrd.paCalls);
508 }
509 RTMemFree(pTb);
510 }
511 RT_NOREF(pVM);
512 return NULL;
513}
514
515
516/**
517 * Frees pTb.
518 *
519 * @param pVM The cross context virtual machine structure.
520 * @param pVCpu The cross context virtual CPU structure of the calling
521 * thread.
522 * @param pTb The translation block to free..
523 */
524static void iemThreadedTbFree(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)
525{
526 RT_NOREF(pVM);
527 AssertPtr(pTb);
528
529 AssertCompile(IEMTB_F_STATE_OBSOLETE == IEMTB_F_STATE_MASK);
530 pTb->fFlags |= IEMTB_F_STATE_OBSOLETE; /* works, both bits set */
531
532 /* Unlink it from the hash table: */
533 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);
534 PIEMTB pTbCur = g_TbCache.apHash[idxHash];
535 if (pTbCur == pTb)
536 g_TbCache.apHash[idxHash] = pTb->pNext;
537 else
538 while (pTbCur)
539 {
540 PIEMTB const pNextTb = pTbCur->pNext;
541 if (pNextTb == pTb)
542 {
543 pTbCur->pNext = pTb->pNext;
544 break;
545 }
546 pTbCur = pNextTb;
547 }
548
549 /* Free it. */
550 RTMemFree(pTb->Thrd.paCalls);
551 pTb->Thrd.paCalls = NULL;
552
553 RTMemFree(pTb->pabOpcodes);
554 pTb->pabOpcodes = NULL;
555
556 RTMemFree(pTb);
557 pVCpu->iem.s.cTbFrees++;
558}
559
560
561/**
562 * Called by opcode verifier functions when they detect a problem.
563 */
564void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb)
565{
566 iemThreadedTbFree(pVCpu->CTX_SUFF(pVM), pVCpu, pTb);
567}
568
569
570static PIEMTB iemThreadedTbLookup(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
571{
572 uint32_t const fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags | IEMTB_F_STATE_READY;
573 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, fFlags, GCPhysPc);
574 Log10(("TB lookup: idxHash=%#x fFlags=%#x GCPhysPc=%RGp\n", idxHash, fFlags, GCPhysPc));
575 PIEMTB pTb = g_TbCache.apHash[idxHash];
576 while (pTb)
577 {
578 if (pTb->GCPhysPc == GCPhysPc)
579 {
580 if (pTb->fFlags == fFlags)
581 {
582 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
583 {
584#ifdef VBOX_WITH_STATISTICS
585 pVCpu->iem.s.cTbLookupHits++;
586#endif
587 return pTb;
588 }
589 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
590 }
591 else
592 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
593 }
594 else
595 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
596
597 pTb = pTb->pNext;
598 }
599 RT_NOREF(pVM);
600 pVCpu->iem.s.cTbLookupMisses++;
601 return pTb;
602}
603
604
605static void iemThreadedTbAdd(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)
606{
607 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);
608 pTb->pNext = g_TbCache.apHash[idxHash];
609 g_TbCache.apHash[idxHash] = pTb;
610 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
611 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
612 if (LogIs12Enabled())
613 {
614 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
615 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, idxHash, pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
616 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
617 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
618 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
619 pTb->aRanges[idxRange].idxPhysPage == 0
620 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
621 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
622 }
623 RT_NOREF(pVM);
624}
625
626
627/*
628 * Real code.
629 */
630
631#ifdef LOG_ENABLED
632/**
633 * Logs the current instruction.
634 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
635 * @param pszFunction The IEM function doing the execution.
636 */
637static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction) RT_NOEXCEPT
638{
639# ifdef IN_RING3
640 if (LogIs2Enabled())
641 {
642 char szInstr[256];
643 uint32_t cbInstr = 0;
644 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
645 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
646 szInstr, sizeof(szInstr), &cbInstr);
647
648 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
649 Log2(("**** %s fExec=%x pTb=%p\n"
650 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
651 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
652 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
653 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
654 " %s\n"
655 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3,
656 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
657 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
658 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
659 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
660 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
661 szInstr));
662
663 if (LogIs3Enabled())
664 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);
665 }
666 else
667# endif
668 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
669 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
670}
671#endif /* LOG_ENABLED */
672
673
674static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
675{
676 RT_NOREF(pVM, pVCpu);
677 return rcStrict;
678}
679
680
681/**
682 * Initializes the decoder state when compiling TBs.
683 *
684 * This presumes that fExec has already be initialized.
685 *
686 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
687 * to apply fixes to them as well.
688 *
689 * @param pVCpu The cross context virtual CPU structure of the calling
690 * thread.
691 * @param fReInit Clear for the first call for a TB, set for subsequent
692 * calls from inside the compile loop where we can skip a
693 * couple of things.
694 * @param fExtraFlags The extra translation block flags when @a fReInit is
695 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
696 * checked.
697 */
698DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
699{
700 /* ASSUMES: That iemInitExec was already called and that anyone changing
701 CPU state affecting the fExec bits since then will have updated fExec! */
702 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
703 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
704
705 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
706
707 /* Decoder state: */
708 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
709 pVCpu->iem.s.enmEffAddrMode = enmMode;
710 if (enmMode != IEMMODE_64BIT)
711 {
712 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
713 pVCpu->iem.s.enmEffOpSize = enmMode;
714 }
715 else
716 {
717 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
718 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
719 }
720 pVCpu->iem.s.fPrefixes = 0;
721 pVCpu->iem.s.uRexReg = 0;
722 pVCpu->iem.s.uRexB = 0;
723 pVCpu->iem.s.uRexIndex = 0;
724 pVCpu->iem.s.idxPrefix = 0;
725 pVCpu->iem.s.uVex3rdReg = 0;
726 pVCpu->iem.s.uVexLength = 0;
727 pVCpu->iem.s.fEvexStuff = 0;
728 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
729 pVCpu->iem.s.offModRm = 0;
730 pVCpu->iem.s.iNextMapping = 0;
731
732 if (!fReInit)
733 {
734 pVCpu->iem.s.cActiveMappings = 0;
735 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
736 pVCpu->iem.s.fEndTb = false;
737 pVCpu->iem.s.fTbCheckOpcodes = false;
738 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
739 pVCpu->iem.s.fTbCrossedPage = false;
740 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
741 pVCpu->iem.s.fTbCurInstrIsSti = false;
742 }
743 else
744 {
745 Assert(pVCpu->iem.s.cActiveMappings == 0);
746 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
747 Assert(pVCpu->iem.s.fEndTb == false);
748 Assert(pVCpu->iem.s.fTbCrossedPage == false);
749 }
750
751#ifdef DBGFTRACE_ENABLED
752 switch (IEM_GET_CPU_MODE(pVCpu))
753 {
754 case IEMMODE_64BIT:
755 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
756 break;
757 case IEMMODE_32BIT:
758 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
759 break;
760 case IEMMODE_16BIT:
761 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
762 break;
763 }
764#endif
765}
766
767
768/**
769 * Initializes the opcode fetcher when starting the compilation.
770 *
771 * @param pVCpu The cross context virtual CPU structure of the calling
772 * thread.
773 */
774DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
775{
776 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
777#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
778 pVCpu->iem.s.offOpcode = 0;
779#else
780 RT_NOREF(pVCpu);
781#endif
782}
783
784
785/**
786 * Re-initializes the opcode fetcher between instructions while compiling.
787 *
788 * @param pVCpu The cross context virtual CPU structure of the calling
789 * thread.
790 */
791DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
792{
793 if (pVCpu->iem.s.pbInstrBuf)
794 {
795 uint64_t off = pVCpu->cpum.GstCtx.rip;
796 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
797 off += pVCpu->cpum.GstCtx.cs.u64Base;
798 off -= pVCpu->iem.s.uInstrBufPc;
799 if (off < pVCpu->iem.s.cbInstrBufTotal)
800 {
801 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
802 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
803 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
804 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
805 else
806 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
807 }
808 else
809 {
810 pVCpu->iem.s.pbInstrBuf = NULL;
811 pVCpu->iem.s.offInstrNextByte = 0;
812 pVCpu->iem.s.offCurInstrStart = 0;
813 pVCpu->iem.s.cbInstrBuf = 0;
814 pVCpu->iem.s.cbInstrBufTotal = 0;
815 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
816 }
817 }
818 else
819 {
820 pVCpu->iem.s.offInstrNextByte = 0;
821 pVCpu->iem.s.offCurInstrStart = 0;
822 pVCpu->iem.s.cbInstrBuf = 0;
823 pVCpu->iem.s.cbInstrBufTotal = 0;
824#ifdef VBOX_STRICT
825 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
826#endif
827 }
828#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
829 pVCpu->iem.s.offOpcode = 0;
830#endif
831}
832
833
834DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
835{
836 switch (cbInstr)
837 {
838 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
839 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
840 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
841 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
842 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
843 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
844 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
845 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
846 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
847 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
848 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
849 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
850 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
851 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
852 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
853 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
854 }
855}
856
857
858/**
859 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
860 *
861 * - CS LIM check required.
862 * - Must recheck opcode bytes.
863 * - Previous instruction branched.
864 * - TLB load detected, probably due to page crossing.
865 *
866 * @returns true if everything went well, false if we're out of space in the TB
867 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
868 * @param pVCpu The cross context virtual CPU structure of the calling
869 * thread.
870 * @param pTb The translation block being compiled.
871 */
872bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
873{
874 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
875#if 0
876 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
877 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
878#endif
879
880 /*
881 * If we're not in 64-bit mode and not already checking CS.LIM we need to
882 * see if it's needed to start checking.
883 */
884 bool fConsiderCsLimChecking;
885 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
886 if ( fMode == IEM_F_MODE_X86_64BIT
887 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
888 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
889 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
890 fConsiderCsLimChecking = false; /* already enabled or not needed */
891 else
892 {
893 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
894 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
895 fConsiderCsLimChecking = true; /* likely */
896 else
897 {
898 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
899 return false;
900 }
901 }
902
903 /*
904 * Prepare call now, even before we know if can accept the instruction in this TB.
905 * This allows us amending parameters w/o making every case suffer.
906 */
907 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
908 uint16_t const offOpcode = pTb->cbOpcodes;
909 uint8_t idxRange = pTb->cRanges - 1;
910
911 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
912 pCall->idxInstr = pTb->cInstructions;
913 pCall->offOpcode = offOpcode;
914 pCall->idxRange = idxRange;
915 pCall->cbOpcode = cbInstr;
916 pCall->auParams[0] = cbInstr;
917 pCall->auParams[1] = idxRange;
918 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
919
920/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
921 * gotten onto. If we do, stop */
922
923 /*
924 * Case 1: We've branched (RIP changed).
925 *
926 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
927 * Req: 1 extra range, no extra phys.
928 *
929 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
930 * necessary (fTbCrossedPage is true).
931 * Req: 1 extra range, probably 1 extra phys page entry.
932 *
933 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
934 * but in addition we cross into the following page and require
935 * another TLB load.
936 * Req: 2 extra ranges, probably 2 extra phys page entries.
937 *
938 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
939 * the following page (thus fTbCrossedPage is true).
940 * Req: 2 extra ranges, probably 1 extra phys page entry.
941 *
942 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
943 * it may trigger "spuriously" from the CPU point of view because of
944 * physical page changes that'll invalid the physical TLB and trigger a
945 * call to the function. In theory this be a big deal, just a bit
946 * performance loss as we'll pick the LoadingTlb variants.
947 *
948 * Note! We do not currently optimize branching to the next instruction (sorry
949 * 32-bit PIC code). We could maybe do that in the branching code that
950 * sets (or not) fTbBranched.
951 */
952 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
953 * variant in win 3.1 code and the call variant in 32-bit linux PIC
954 * code. This'll require filtering out far jmps and calls, as they
955 * load CS which should technically be considered indirect since the
956 * GDT/LDT entry's base address can be modified independently from
957 * the code. */
958 if (pVCpu->iem.s.fTbBranched != 0)
959 {
960 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
961 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
962 {
963 /* 1a + 1b - instruction fully within the branched to page. */
964 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
965 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
966
967 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
968 {
969 /* Check that we've got a free range. */
970 idxRange += 1;
971 if (idxRange < RT_ELEMENTS(pTb->aRanges))
972 { /* likely */ }
973 else
974 {
975 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
976 return false;
977 }
978 pCall->idxRange = idxRange;
979 pCall->auParams[1] = idxRange;
980 pCall->auParams[2] = 0;
981
982 /* Check that we've got a free page slot. */
983 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
984 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
985 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
986 pTb->aRanges[idxRange].idxPhysPage = 0;
987 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
988 || pTb->aGCPhysPages[0] == GCPhysNew)
989 {
990 pTb->aGCPhysPages[0] = GCPhysNew;
991 pTb->aRanges[idxRange].idxPhysPage = 1;
992 }
993 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
994 || pTb->aGCPhysPages[1] == GCPhysNew)
995 {
996 pTb->aGCPhysPages[1] = GCPhysNew;
997 pTb->aRanges[idxRange].idxPhysPage = 2;
998 }
999 else
1000 {
1001 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1002 return false;
1003 }
1004
1005 /* Finish setting up the new range. */
1006 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1007 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1008 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1009 pTb->aRanges[idxRange].u2Unused = 0;
1010 pTb->cRanges++;
1011 }
1012 else
1013 {
1014 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1015 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1016 }
1017
1018 /* Determin which function we need to load & check.
1019 Note! For jumps to a new page, we'll set both fTbBranched and
1020 fTbCrossedPage to avoid unnecessary TLB work for intra
1021 page branching */
1022 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1023 || pVCpu->iem.s.fTbCrossedPage)
1024 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1025 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1026 : !fConsiderCsLimChecking
1027 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1028 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1029 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1030 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1031 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1032 : !fConsiderCsLimChecking
1033 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1034 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1035 else
1036 {
1037 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1038 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1039 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1040 : !fConsiderCsLimChecking
1041 ? kIemThreadedFunc_BltIn_CheckOpcodes
1042 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1043 }
1044 }
1045 else
1046 {
1047 /* 1c + 1d - instruction crosses pages. */
1048 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1049 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1050
1051 /* Lazy bird: Check that this isn't case 1c, since we've already
1052 load the first physical address. End the TB and
1053 make it a case 2b instead.
1054
1055 Hmm. Too much bother to detect, so just do the same
1056 with case 1d as well. */
1057#if 0 /** @todo get back to this later when we've got the actual branch code in
1058 * place. */
1059 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1060
1061 /* Check that we've got two free ranges. */
1062 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1063 { /* likely */ }
1064 else
1065 return false;
1066 idxRange += 1;
1067 pCall->idxRange = idxRange;
1068 pCall->auParams[1] = idxRange;
1069 pCall->auParams[2] = 0;
1070
1071 /* ... */
1072
1073#else
1074 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1075 return false;
1076#endif
1077 }
1078 }
1079
1080 /*
1081 * Case 2: Page crossing.
1082 *
1083 * Sub-case 2a: The instruction starts on the first byte in the next page.
1084 *
1085 * Sub-case 2b: The instruction has opcode bytes in both the current and
1086 * following page.
1087 *
1088 * Both cases requires a new range table entry and probably a new physical
1089 * page entry. The difference is in which functions to emit and whether to
1090 * add bytes to the current range.
1091 */
1092 else if (pVCpu->iem.s.fTbCrossedPage)
1093 {
1094 /* Check that we've got a free range. */
1095 idxRange += 1;
1096 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1097 { /* likely */ }
1098 else
1099 {
1100 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1101 return false;
1102 }
1103
1104 /* Check that we've got a free page slot. */
1105 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1106 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1107 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1108 pTb->aRanges[idxRange].idxPhysPage = 0;
1109 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1110 || pTb->aGCPhysPages[0] == GCPhysNew)
1111 {
1112 pTb->aGCPhysPages[0] = GCPhysNew;
1113 pTb->aRanges[idxRange].idxPhysPage = 1;
1114 }
1115 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1116 || pTb->aGCPhysPages[1] == GCPhysNew)
1117 {
1118 pTb->aGCPhysPages[1] = GCPhysNew;
1119 pTb->aRanges[idxRange].idxPhysPage = 2;
1120 }
1121 else
1122 {
1123 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1124 return false;
1125 }
1126
1127 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
1128 {
1129 Assert(pVCpu->iem.s.offCurInstrStart == 0);
1130 pCall->idxRange = idxRange;
1131 pCall->auParams[1] = idxRange;
1132 pCall->auParams[2] = 0;
1133
1134 /* Finish setting up the new range. */
1135 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1136 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1137 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1138 pTb->aRanges[idxRange].u2Unused = 0;
1139 pTb->cRanges++;
1140
1141 /* Determin which function we need to load & check. */
1142 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1143 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1144 : !fConsiderCsLimChecking
1145 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1146 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
1147 }
1148 else
1149 {
1150 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1151 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1152 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1153 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
1154
1155 /* We've good. Split the instruction over the old and new range table entries. */
1156 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
1157
1158 pTb->aRanges[idxRange].offPhysPage = 0;
1159 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
1160 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
1161 pTb->aRanges[idxRange].u2Unused = 0;
1162 pTb->cRanges++;
1163
1164 /* Determin which function we need to load & check. */
1165 if (pVCpu->iem.s.fTbCheckOpcodes)
1166 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1167 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1168 : !fConsiderCsLimChecking
1169 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1170 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
1171 else
1172 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1173 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1174 : !fConsiderCsLimChecking
1175 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1176 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
1177 }
1178 }
1179
1180 /*
1181 * Regular case: No new range required.
1182 */
1183 else
1184 {
1185 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
1186 if (pVCpu->iem.s.fTbCheckOpcodes)
1187 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1188 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1189 : kIemThreadedFunc_BltIn_CheckOpcodes;
1190 else
1191 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
1192
1193 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1194 pTb->cbOpcodes = offOpcode + cbInstr;
1195 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1196 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1197 }
1198
1199 /*
1200 * Commit the call.
1201 */
1202 pTb->Thrd.cCalls++;
1203
1204 /*
1205 * Clear state.
1206 */
1207 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1208 pVCpu->iem.s.fTbCrossedPage = false;
1209 pVCpu->iem.s.fTbCheckOpcodes = false;
1210
1211 /*
1212 * Copy opcode bytes.
1213 */
1214 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1215 pTb->cbOpcodes = offOpcode + cbInstr;
1216 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1217
1218 return true;
1219}
1220
1221
1222/**
1223 * Worker for iemThreadedCompileBeginEmitCallsComplications and
1224 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
1225 *
1226 * @returns true if anything is pending, false if not.
1227 * @param pVCpu The cross context virtual CPU structure of the calling
1228 * thread.
1229 */
1230DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
1231{
1232 uint64_t fCpu = pVCpu->fLocalForcedActions;
1233 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
1234#if 1
1235 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
1236 if (RT_LIKELY( !fCpu
1237 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1238 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
1239 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
1240 return false;
1241 return true;
1242#else
1243 return false;
1244#endif
1245
1246}
1247
1248
1249/**
1250 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
1251 * set.
1252 *
1253 * @returns true if we should continue, false if an IRQ is deliverable or a
1254 * relevant force flag is pending.
1255 * @param pVCpu The cross context virtual CPU structure of the calling
1256 * thread.
1257 * @param pTb The translation block being compiled.
1258 * @sa iemThreadedCompileCheckIrq
1259 */
1260bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
1261{
1262 /*
1263 * Skip this we've already emitted a call after the previous instruction
1264 * or if it's the first call, as we're always checking FFs between blocks.
1265 */
1266 uint32_t const idxCall = pTb->Thrd.cCalls;
1267 if ( idxCall > 0
1268 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
1269 {
1270 /* Emit the call. */
1271 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1272 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1273 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1274 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
1275 pCall->idxInstr = pTb->cInstructions;
1276 pCall->uUnused0 = 0;
1277 pCall->offOpcode = 0;
1278 pCall->cbOpcode = 0;
1279 pCall->idxRange = 0;
1280 pCall->auParams[0] = 0;
1281 pCall->auParams[1] = 0;
1282 pCall->auParams[2] = 0;
1283 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1284
1285 /* Reset the IRQ check value. */
1286 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
1287
1288 /*
1289 * Check for deliverable IRQs and pending force flags.
1290 */
1291 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
1292 }
1293 return true; /* continue */
1294}
1295
1296
1297/**
1298 * Emits an IRQ check call and checks for pending IRQs.
1299 *
1300 * @returns true if we should continue, false if an IRQ is deliverable or a
1301 * relevant force flag is pending.
1302 * @param pVCpu The cross context virtual CPU structure of the calling
1303 * thread.
1304 * @param pTb The transation block.
1305 * @sa iemThreadedCompileBeginEmitCallsComplications
1306 */
1307static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
1308{
1309 /* Check again in a little bit, unless it is immediately following an STI
1310 in which case we *must* check immediately after the next instruction
1311 as well in case it's executed with interrupt inhibition. We could
1312 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
1313 bs3-timers-1 which is doing sti + sti + cli. */
1314 if (!pVCpu->iem.s.fTbCurInstrIsSti)
1315 pVCpu->iem.s.cInstrTillIrqCheck = 32;
1316 else
1317 {
1318 pVCpu->iem.s.fTbCurInstrIsSti = false;
1319 pVCpu->iem.s.cInstrTillIrqCheck = 0;
1320 }
1321 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1322
1323 /*
1324 * Emit the call.
1325 */
1326 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
1327 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
1328 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
1329 pCall->idxInstr = pTb->cInstructions;
1330 pCall->uUnused0 = 0;
1331 pCall->offOpcode = 0;
1332 pCall->cbOpcode = 0;
1333 pCall->idxRange = 0;
1334 pCall->auParams[0] = 0;
1335 pCall->auParams[1] = 0;
1336 pCall->auParams[2] = 0;
1337
1338 /*
1339 * Check for deliverable IRQs and pending force flags.
1340 */
1341 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
1342}
1343
1344
1345/**
1346 * Compiles a new TB and executes it.
1347 *
1348 * We combine compilation and execution here as it makes it simpler code flow
1349 * in the main loop and it allows interpreting while compiling if we want to
1350 * explore that option.
1351 *
1352 * @returns Strict VBox status code.
1353 * @param pVM The cross context virtual machine structure.
1354 * @param pVCpu The cross context virtual CPU structure of the calling
1355 * thread.
1356 * @param GCPhysPc The physical address corresponding to the current
1357 * RIP+CS.BASE.
1358 * @param fExtraFlags Extra translation block flags: IEMTB_F_TYPE_THREADED and
1359 * maybe IEMTB_F_RIP_CHECKS.
1360 */
1361static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
1362{
1363 /*
1364 * Allocate a new translation block.
1365 */
1366 PIEMTB pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags | IEMTB_F_STATE_COMPILING);
1367 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
1368
1369 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
1370 functions may get at it. */
1371 pVCpu->iem.s.pCurTbR3 = pTb;
1372
1373#if 0
1374 /* Make sure the CheckIrq condition matches the one in EM. */
1375 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
1376 const uint32_t cZeroCalls = 1;
1377#else
1378 const uint32_t cZeroCalls = 0;
1379#endif
1380
1381 /*
1382 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
1383 */
1384 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
1385 iemThreadedCompileInitOpcodeFetching(pVCpu);
1386 VBOXSTRICTRC rcStrict;
1387 for (;;)
1388 {
1389 /* Process the next instruction. */
1390#ifdef LOG_ENABLED
1391 iemThreadedLogCurInstr(pVCpu, "CC");
1392 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
1393 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
1394#endif
1395 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
1396 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
1397
1398 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
1399 if ( rcStrict == VINF_SUCCESS
1400 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
1401 && !pVCpu->iem.s.fEndTb)
1402 {
1403 Assert(pTb->Thrd.cCalls > cCallsPrev);
1404 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
1405
1406 pVCpu->iem.s.cInstructions++;
1407 }
1408 else
1409 {
1410 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
1411 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
1412 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
1413 rcStrict = VINF_SUCCESS;
1414
1415 if (pTb->Thrd.cCalls > cZeroCalls)
1416 {
1417 if (cCallsPrev != pTb->Thrd.cCalls)
1418 pVCpu->iem.s.cInstructions++;
1419 break;
1420 }
1421
1422 pVCpu->iem.s.pCurTbR3 = NULL;
1423 iemThreadedTbFree(pVM, pVCpu, pTb);
1424 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
1425 }
1426
1427 /* Check for IRQs? */
1428 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
1429 pVCpu->iem.s.cInstrTillIrqCheck--;
1430 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
1431 break;
1432
1433 /* Still space in the TB? */
1434 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
1435 && pTb->cbOpcodes + 16 <= pTb->cbOpcodesAllocated)
1436 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
1437 else
1438 {
1439 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
1440 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
1441 break;
1442 }
1443 iemThreadedCompileReInitOpcodeFetching(pVCpu);
1444 }
1445
1446 /*
1447 * Complete the TB and link it.
1448 */
1449 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_STATE_MASK) | IEMTB_F_STATE_READY;
1450 iemThreadedTbAdd(pVM, pVCpu, pTb);
1451
1452#ifdef IEM_COMPILE_ONLY_MODE
1453 /*
1454 * Execute the translation block.
1455 */
1456#endif
1457
1458 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
1459}
1460
1461
1462/**
1463 * Executes a translation block.
1464 *
1465 * @returns Strict VBox status code.
1466 * @param pVCpu The cross context virtual CPU structure of the calling
1467 * thread.
1468 * @param pTb The translation block to execute.
1469 */
1470static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
1471{
1472 /* Check the opcodes in the first page before starting execution. */
1473 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
1474 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
1475 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
1476 { /* likely */ }
1477 else
1478 {
1479 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
1480 iemThreadedTbFree(pVCpu->pVMR3, pVCpu, pTb);
1481 return VINF_SUCCESS;
1482 }
1483
1484 /* Set the current TB so CIMPL function may get at it. */
1485 pVCpu->iem.s.pCurTbR3 = pTb;
1486 pVCpu->iem.s.cTbExec++;
1487
1488 /* The execution loop. */
1489#ifdef LOG_ENABLED
1490 uint64_t uRipPrev = UINT64_MAX;
1491#endif
1492 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
1493 uint32_t cCallsLeft = pTb->Thrd.cCalls;
1494 while (cCallsLeft-- > 0)
1495 {
1496#ifdef LOG_ENABLED
1497 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
1498 {
1499 uRipPrev = pVCpu->cpum.GstCtx.rip;
1500 iemThreadedLogCurInstr(pVCpu, "EX");
1501 }
1502 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1503 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
1504 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
1505#endif
1506 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
1507 pCallEntry->auParams[0],
1508 pCallEntry->auParams[1],
1509 pCallEntry->auParams[2]);
1510 if (RT_LIKELY( rcStrict == VINF_SUCCESS
1511 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
1512 pCallEntry++;
1513 else
1514 {
1515 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
1516 pVCpu->iem.s.pCurTbR3 = NULL;
1517 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
1518
1519 /* Some status codes are just to get us out of this loop and
1520 continue in a different translation block. */
1521 if (rcStrict == VINF_IEM_REEXEC_BREAK)
1522 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
1523 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
1524 }
1525 }
1526
1527 pVCpu->iem.s.cInstructions += pTb->cInstructions;
1528 pVCpu->iem.s.pCurTbR3 = NULL;
1529 return VINF_SUCCESS;
1530}
1531
1532
1533/**
1534 * This is called when the PC doesn't match the current pbInstrBuf.
1535 *
1536 * Upon return, we're ready for opcode fetching. But please note that
1537 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
1538 * MMIO or unassigned).
1539 */
1540static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
1541{
1542 pVCpu->iem.s.pbInstrBuf = NULL;
1543 pVCpu->iem.s.offCurInstrStart = 0;
1544 pVCpu->iem.s.offInstrNextByte = 0;
1545 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
1546 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
1547}
1548
1549
1550/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
1551DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
1552{
1553 /*
1554 * Set uCurTbStartPc to RIP and calc the effective PC.
1555 */
1556 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
1557 pVCpu->iem.s.uCurTbStartPc = uPc;
1558 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1559 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
1560
1561 /*
1562 * Advance within the current buffer (PAGE) when possible.
1563 */
1564 if (pVCpu->iem.s.pbInstrBuf)
1565 {
1566 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
1567 if (off < pVCpu->iem.s.cbInstrBufTotal)
1568 {
1569 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1570 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1571 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1572 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1573 else
1574 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1575
1576 return pVCpu->iem.s.GCPhysInstrBuf + off;
1577 }
1578 }
1579 return iemGetPcWithPhysAndCodeMissed(pVCpu);
1580}
1581
1582
1583/**
1584 * Determines the extra IEMTB_F_XXX flags.
1585 *
1586 * @returns IEMTB_F_TYPE_THREADED and maybe IEMTB_F_RIP_CHECKS.
1587 * @param pVCpu The cross context virtual CPU structure of the calling
1588 * thread.
1589 */
1590DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
1591{
1592 uint32_t fRet = IEMTB_F_TYPE_THREADED;
1593
1594 /*
1595 * Determine the inhibit bits.
1596 */
1597 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
1598 { /* typical */ }
1599 else
1600 {
1601 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
1602 fRet |= IEMTB_F_INHIBIT_SHADOW;
1603 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
1604 fRet |= IEMTB_F_INHIBIT_NMI;
1605 }
1606
1607 /*
1608 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
1609 * likely to go invalid before the end of the translation block.
1610 */
1611 if (IEM_IS_64BIT_CODE(pVCpu))
1612 return fRet;
1613
1614 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1615 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1616 return fRet;
1617 return fRet | IEMTB_F_CS_LIM_CHECKS;
1618}
1619
1620
1621VMMDECL(VBOXSTRICTRC) IEMExecRecompilerThreaded(PVMCC pVM, PVMCPUCC pVCpu)
1622{
1623 /*
1624 * See if there is an interrupt pending in TRPM, inject it if we can.
1625 */
1626 if (!TRPMHasTrap(pVCpu))
1627 { /* likely */ }
1628 else
1629 {
1630 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
1631 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
1632 { /*likely */ }
1633 else
1634 return rcStrict;
1635 }
1636
1637 /*
1638 * Init the execution environment.
1639 */
1640 iemInitExec(pVCpu, 0 /*fExecOpts*/);
1641
1642 /*
1643 * Run-loop.
1644 *
1645 * If we're using setjmp/longjmp we combine all the catching here to avoid
1646 * having to call setjmp for each block we're executing.
1647 */
1648 for (;;)
1649 {
1650 PIEMTB pTb = NULL;
1651 VBOXSTRICTRC rcStrict;
1652 IEM_TRY_SETJMP(pVCpu, rcStrict)
1653 {
1654 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
1655 for (uint32_t iIterations = 0; ; iIterations++)
1656 {
1657 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
1658 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
1659 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
1660
1661 pTb = iemThreadedTbLookup(pVM, pVCpu, GCPhysPc, fExtraFlags);
1662 if (pTb)
1663 rcStrict = iemThreadedTbExec(pVCpu, pTb);
1664 else
1665 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
1666 if (rcStrict == VINF_SUCCESS)
1667 {
1668 Assert(pVCpu->iem.s.cActiveMappings == 0);
1669
1670 uint64_t fCpu = pVCpu->fLocalForcedActions;
1671 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
1672 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
1673 | VMCPU_FF_TLB_FLUSH
1674 | VMCPU_FF_UNHALT );
1675 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
1676 if (RT_LIKELY( ( !fCpu
1677 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1678 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
1679 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
1680 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
1681 {
1682 if (RT_LIKELY( (iIterations & cPollRate) != 0
1683 || !TMTimerPollBool(pVM, pVCpu)))
1684 {
1685
1686 }
1687 else
1688 return VINF_SUCCESS;
1689 }
1690 else
1691 return VINF_SUCCESS;
1692 }
1693 else
1694 return rcStrict;
1695 }
1696 }
1697 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
1698 {
1699 pVCpu->iem.s.cLongJumps++;
1700 if (pVCpu->iem.s.cActiveMappings > 0)
1701 iemMemRollback(pVCpu);
1702
1703 /* If pTb isn't NULL we're in iemThreadedTbExec. */
1704 if (!pTb)
1705 {
1706 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
1707 pTb = pVCpu->iem.s.pCurTbR3;
1708 if (pTb)
1709 {
1710 /* If the pCurTbR3 block is in compiling state, we're in iemThreadedCompile,
1711 otherwise it's iemThreadedTbExec inside iemThreadedCompile (compile option). */
1712 if ((pTb->fFlags & IEMTB_F_STATE_MASK) == IEMTB_F_STATE_COMPILING)
1713 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
1714 }
1715 }
1716 return rcStrict;
1717 }
1718 IEM_CATCH_LONGJMP_END(pVCpu);
1719 }
1720}
1721
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette