VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 101189

Last change on this file since 101189 was 101163, checked in by vboxsync, 15 months ago

VMM/IEM: Experimental recompilation of threaded blocks into native code on linux.amd64. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 99.2 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 101163 2023-09-18 20:44:24Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) :
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) :
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95
96
97/*
98 * Narrow down configs here to avoid wasting time on unused configs here.
99 */
100
101#ifndef IEM_WITH_CODE_TLB
102# error The code TLB must be enabled for the recompiler.
103#endif
104
105#ifndef IEM_WITH_DATA_TLB
106# error The data TLB must be enabled for the recompiler.
107#endif
108
109#ifndef IEM_WITH_SETJMP
110# error The setjmp approach must be enabled for the recompiler.
111#endif
112
113
114/*********************************************************************************************************************************
115* Internal Functions *
116*********************************************************************************************************************************/
117static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
118
119
120/**
121 * Calculates the effective address of a ModR/M memory operand, extended version
122 * for use in the recompilers.
123 *
124 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
125 *
126 * May longjmp on internal error.
127 *
128 * @return The effective address.
129 * @param pVCpu The cross context virtual CPU structure of the calling thread.
130 * @param bRm The ModRM byte.
131 * @param cbImmAndRspOffset - First byte: The size of any immediate
132 * following the effective address opcode bytes
133 * (only for RIP relative addressing).
134 * - Second byte: RSP displacement (for POP [ESP]).
135 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
136 * SIB byte (bits 39:32).
137 *
138 * @note This must be defined in a source file with matching
139 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
140 * or implemented differently...
141 */
142RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
143{
144 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
145# define SET_SS_DEF() \
146 do \
147 { \
148 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
149 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
150 } while (0)
151
152 if (!IEM_IS_64BIT_CODE(pVCpu))
153 {
154/** @todo Check the effective address size crap! */
155 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
156 {
157 uint16_t u16EffAddr;
158
159 /* Handle the disp16 form with no registers first. */
160 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
161 {
162 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
163 *puInfo = u16EffAddr;
164 }
165 else
166 {
167 /* Get the displacment. */
168 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
169 {
170 case 0: u16EffAddr = 0; break;
171 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
172 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
173 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
174 }
175 *puInfo = u16EffAddr;
176
177 /* Add the base and index registers to the disp. */
178 switch (bRm & X86_MODRM_RM_MASK)
179 {
180 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
181 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
182 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
183 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
184 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
185 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
186 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
187 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
188 }
189 }
190
191 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
192 return u16EffAddr;
193 }
194
195 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
196 uint32_t u32EffAddr;
197 uint64_t uInfo;
198
199 /* Handle the disp32 form with no registers first. */
200 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
201 {
202 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
203 uInfo = u32EffAddr;
204 }
205 else
206 {
207 /* Get the register (or SIB) value. */
208 uInfo = 0;
209 switch ((bRm & X86_MODRM_RM_MASK))
210 {
211 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
212 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
213 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
214 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
215 case 4: /* SIB */
216 {
217 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
218 uInfo = (uint64_t)bSib << 32;
219
220 /* Get the index and scale it. */
221 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
222 {
223 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
224 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
225 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
226 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
227 case 4: u32EffAddr = 0; /*none */ break;
228 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
229 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
230 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
231 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
232 }
233 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
234
235 /* add base */
236 switch (bSib & X86_SIB_BASE_MASK)
237 {
238 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
239 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
240 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
241 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
242 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
243 case 5:
244 if ((bRm & X86_MODRM_MOD_MASK) != 0)
245 {
246 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
247 SET_SS_DEF();
248 }
249 else
250 {
251 uint32_t u32Disp;
252 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
253 u32EffAddr += u32Disp;
254 uInfo |= u32Disp;
255 }
256 break;
257 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
258 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
259 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
260 }
261 break;
262 }
263 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
264 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
265 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
266 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
267 }
268
269 /* Get and add the displacement. */
270 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
271 {
272 case 0:
273 break;
274 case 1:
275 {
276 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
277 u32EffAddr += i8Disp;
278 uInfo |= (uint32_t)(int32_t)i8Disp;
279 break;
280 }
281 case 2:
282 {
283 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
284 u32EffAddr += u32Disp;
285 uInfo |= u32Disp;
286 break;
287 }
288 default:
289 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
290 }
291 }
292
293 *puInfo = uInfo;
294 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
295 return u32EffAddr;
296 }
297
298 uint64_t u64EffAddr;
299 uint64_t uInfo;
300
301 /* Handle the rip+disp32 form with no registers first. */
302 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
303 {
304 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
305 uInfo = (uint32_t)u64EffAddr;
306 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
307 }
308 else
309 {
310 /* Get the register (or SIB) value. */
311 uInfo = 0;
312 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
313 {
314 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
315 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
316 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
317 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
318 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
319 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
320 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
321 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
322 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
323 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
324 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
325 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
326 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
327 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
328 /* SIB */
329 case 4:
330 case 12:
331 {
332 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
333 uInfo = (uint64_t)bSib << 32;
334
335 /* Get the index and scale it. */
336 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
337 {
338 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
339 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
340 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
341 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
342 case 4: u64EffAddr = 0; /*none */ break;
343 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
344 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
345 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
346 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
347 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
348 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
349 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
350 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
351 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
352 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
353 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
354 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
355 }
356 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
357
358 /* add base */
359 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
360 {
361 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
362 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
363 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
364 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
365 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
366 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
367 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
368 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
369 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
370 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
371 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
372 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
373 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
374 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
375 /* complicated encodings */
376 case 5:
377 case 13:
378 if ((bRm & X86_MODRM_MOD_MASK) != 0)
379 {
380 if (!pVCpu->iem.s.uRexB)
381 {
382 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
383 SET_SS_DEF();
384 }
385 else
386 u64EffAddr += pVCpu->cpum.GstCtx.r13;
387 }
388 else
389 {
390 uint32_t u32Disp;
391 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
392 u64EffAddr += (int32_t)u32Disp;
393 uInfo |= u32Disp;
394 }
395 break;
396 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
397 }
398 break;
399 }
400 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
401 }
402
403 /* Get and add the displacement. */
404 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
405 {
406 case 0:
407 break;
408 case 1:
409 {
410 int8_t i8Disp;
411 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
412 u64EffAddr += i8Disp;
413 uInfo |= (uint32_t)(int32_t)i8Disp;
414 break;
415 }
416 case 2:
417 {
418 uint32_t u32Disp;
419 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
420 u64EffAddr += (int32_t)u32Disp;
421 uInfo |= u32Disp;
422 break;
423 }
424 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
425 }
426
427 }
428
429 *puInfo = uInfo;
430 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
431 {
432 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
433 return u64EffAddr;
434 }
435 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
436 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
437 return u64EffAddr & UINT32_MAX;
438}
439
440
441/*********************************************************************************************************************************
442* Translation Block Cache. *
443*********************************************************************************************************************************/
444
445/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
446static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
447{
448 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
449 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
450 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
451 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
452 if (cMsSinceUse1 != cMsSinceUse2)
453 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
454 if (pTb1->cUsed != pTb2->cUsed)
455 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
456 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
457 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
458 return 0;
459}
460
461#ifdef VBOX_STRICT
462/**
463 * Assertion helper that checks a collisions list count.
464 */
465static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
466{
467 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
468 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
469 while (pTb)
470 {
471 pTb = pTb->pNext;
472 cLeft--;
473 }
474 AssertMsg(cLeft == 0,
475 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
476 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
477}
478#endif
479
480
481DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
482{
483 STAM_PROFILE_START(&pTbCache->StatPrune, a);
484
485 /*
486 * First convert the collision list to an array.
487 */
488 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
489 uintptr_t cInserted = 0;
490 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
491
492 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
493
494 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
495 {
496 apSortedTbs[cInserted++] = pTbCollision;
497 pTbCollision = pTbCollision->pNext;
498 }
499
500 /* Free any excess (impossible). */
501 if (RT_LIKELY(!pTbCollision))
502 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
503 else
504 do
505 {
506 PIEMTB pTbToFree = pTbCollision;
507 pTbCollision = pTbToFree->pNext;
508 iemTbAllocatorFree(pVCpu, pTbToFree);
509 } while (pTbCollision);
510
511 /*
512 * Sort it by most recently used and usage count.
513 */
514 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
515
516 /* We keep half the list for now. Perhaps a bit aggressive... */
517 uintptr_t const cKeep = cInserted / 2;
518
519 /* First free up the TBs we don't wish to keep (before creating the new
520 list because otherwise the free code will scan the list for each one
521 without ever finding it). */
522 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
523 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
524
525 /* Then chain the new TB together with the ones we like to keep of the
526 existing ones and insert this list into the hash table. */
527 pTbCollision = pTb;
528 for (uintptr_t idx = 0; idx < cKeep; idx++)
529 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
530 pTbCollision->pNext = NULL;
531
532 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
533#ifdef VBOX_STRICT
534 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
535#endif
536
537 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
538}
539
540
541static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
542{
543 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
544 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
545 if (!pTbOldHead)
546 {
547 pTb->pNext = NULL;
548 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
549 }
550 else
551 {
552 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
553 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
554 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
555 {
556 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
557 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
558#ifdef VBOX_STRICT
559 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
560#endif
561 }
562 else
563 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
564 }
565}
566
567
568/**
569 * Unlinks @a pTb from the hash table if found in it.
570 *
571 * @returns true if unlinked, false if not present.
572 * @param pTbCache The hash table.
573 * @param pTb The TB to remove.
574 */
575static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
576{
577 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
578 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
579 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
580
581 /*
582 * At the head of the collision list?
583 */
584 if (pTbHash == pTb)
585 {
586 if (!pTb->pNext)
587 pTbCache->apHash[idxHash] = NULL;
588 else
589 {
590 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
591 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
592#ifdef VBOX_STRICT
593 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
594#endif
595 }
596 return true;
597 }
598
599 /*
600 * Search the collision list.
601 */
602 PIEMTB const pTbHead = pTbHash;
603 while (pTbHash)
604 {
605 PIEMTB const pNextTb = pTbHash->pNext;
606 if (pNextTb == pTb)
607 {
608 pTbHash->pNext = pTb->pNext;
609 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
610#ifdef VBOX_STRICT
611 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
612#endif
613 return true;
614 }
615 pTbHash = pNextTb;
616 }
617 return false;
618}
619
620
621/**
622 * Looks up a TB for the given PC and flags in the cache.
623 *
624 * @returns Pointer to TB on success, NULL if not found.
625 * @param pVCpu The cross context virtual CPU structure of the
626 * calling thread.
627 * @param pTbCache The translation block cache.
628 * @param GCPhysPc The PC to look up a TB for.
629 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
630 * the lookup.
631 * @thread EMT(pVCpu)
632 */
633static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
634 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
635{
636 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
637 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
638 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
639#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
640 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
641#endif
642 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p L %d\n", fFlags, GCPhysPc, idxHash, pTb, cLeft));
643 while (pTb)
644 {
645 if (pTb->GCPhysPc == GCPhysPc)
646 {
647 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
648 {
649 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
650 {
651 STAM_COUNTER_INC(&pTbCache->cLookupHits);
652 AssertMsg(cLeft > 0, ("%d\n", cLeft));
653
654 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
655 pTb->cUsed++;
656#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
657 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != 16)
658 return pTb;
659 return iemNativeRecompile(pVCpu, pTb);
660#else
661 return pTb;
662#endif
663 }
664 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
665 }
666 else
667 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
668 }
669 else
670 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
671
672 pTb = pTb->pNext;
673#ifdef VBOX_STRICT
674 cLeft--;
675#endif
676 }
677 AssertMsg(cLeft == 0, ("%d\n", cLeft));
678 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
679 return pTb;
680}
681
682
683/*********************************************************************************************************************************
684* Translation Block Allocator.
685*********************************************************************************************************************************/
686/*
687 * Translation block allocationmanagement.
688 */
689
690#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
691# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
692 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
693# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
694 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
695# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
696 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
697#else
698# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
699 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
700# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
701 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
702# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
703 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
704#endif
705/** Makes a TB index from a chunk index and TB index within that chunk. */
706#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
707 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
708
709
710/**
711 * Initializes the TB allocator and cache for an EMT.
712 *
713 * @returns VBox status code.
714 * @param pVM The VM handle.
715 * @param cInitialTbs The initial number of translation blocks to
716 * preallocator.
717 * @param cMaxTbs The max number of translation blocks allowed.
718 * @param cbInitialExec The initial size of the executable memory allocator.
719 * @param cbMaxExec The max size of the executable memory allocator.
720 * @param cbChunkExec The chunk size for executable memory allocator. Zero
721 * or UINT32_MAX for automatically determining this.
722 * @thread EMT
723 */
724DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
725 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
726{
727 PVMCPUCC pVCpu = VMMGetCpu(pVM);
728 Assert(!pVCpu->iem.s.pTbCacheR3);
729 Assert(!pVCpu->iem.s.pTbAllocatorR3);
730
731 /*
732 * Calculate the chunk size of the TB allocator.
733 * The minimum chunk size is 2MiB.
734 */
735 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
736 uint32_t cbPerChunk = _2M;
737 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
738#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
739 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
740 uint8_t cChunkShift = 21 - cTbShift;
741 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
742#endif
743 for (;;)
744 {
745 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
746 break;
747 cbPerChunk *= 2;
748 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
749#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
750 cChunkShift += 1;
751#endif
752 }
753
754 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
755 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
756 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
757
758 cMaxTbs = cMaxChunks * cTbsPerChunk;
759
760 /*
761 * Allocate and initalize it.
762 */
763 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
764 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
765 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
766 if (!pTbAllocator)
767 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
768 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
769 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
770 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
771 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
772 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
773 pTbAllocator->cbPerChunk = cbPerChunk;
774 pTbAllocator->cMaxTbs = cMaxTbs;
775#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
776 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
777 pTbAllocator->cChunkShift = cChunkShift;
778 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
779#endif
780
781 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
782 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
783
784 /*
785 * Allocate the initial chunks.
786 */
787 for (uint32_t idxChunk = 0; ; idxChunk++)
788 {
789 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
790 if (!paTbs)
791 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
792 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
793 cbPerChunk, idxChunk, pVCpu->idCpu);
794
795 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
796 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
797 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
798 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
799 pTbAllocator->cTotalTbs += cTbsPerChunk;
800
801 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
802 break;
803 }
804
805 /*
806 * Calculate the size of the hash table. We double the max TB count and
807 * round it up to the nearest power of two.
808 */
809 uint32_t cCacheEntries = cMaxTbs * 2;
810 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
811 {
812 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
813 cCacheEntries = RT_BIT_32(iBitTop);
814 Assert(cCacheEntries >= cMaxTbs * 2);
815 }
816
817 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
818 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
819 if (!pTbCache)
820 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
821 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
822 cbTbCache, cCacheEntries, pVCpu->idCpu);
823
824 /*
825 * Initialize it (assumes zeroed by the allocator).
826 */
827 pTbCache->uMagic = IEMTBCACHE_MAGIC;
828 pTbCache->cHash = cCacheEntries;
829 pTbCache->uHashMask = cCacheEntries - 1;
830 Assert(pTbCache->cHash > pTbCache->uHashMask);
831 pVCpu->iem.s.pTbCacheR3 = pTbCache;
832
833 /*
834 * Initialize the native executable memory allocator.
835 */
836#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
837 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
838 AssertLogRelRCReturn(rc, rc);
839#else
840 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
841#endif
842
843 return VINF_SUCCESS;
844}
845
846
847/**
848 * Inner free worker.
849 */
850static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
851 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
852{
853 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
854 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
855 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
856 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
857
858 /*
859 * Unlink the TB from the hash table.
860 */
861 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
862
863 /*
864 * Free the TB itself.
865 */
866 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
867 {
868 case IEMTB_F_TYPE_THREADED:
869 pTbAllocator->cThreadedTbs -= 1;
870 RTMemFree(pTb->Thrd.paCalls);
871 break;
872#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
873 case IEMTB_F_TYPE_NATIVE:
874 pTbAllocator->cNativeTbs -= 1;
875 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
876 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
877 break;
878#endif
879 default:
880 AssertFailed();
881 }
882 RTMemFree(pTb->pabOpcodes);
883
884 pTb->pNext = NULL;
885 pTb->fFlags = 0;
886 pTb->GCPhysPc = UINT64_MAX;
887 pTb->Gen.uPtr = 0;
888 pTb->Gen.uData = 0;
889 pTb->cbOpcodes = 0;
890 pTb->cbOpcodesAllocated = 0;
891 pTb->pabOpcodes = NULL;
892
893 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
894 Assert(pTbAllocator->cInUseTbs > 0);
895
896 pTbAllocator->cInUseTbs -= 1;
897 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
898}
899
900
901/**
902 * Frees the given TB.
903 *
904 * @param pVCpu The cross context virtual CPU structure of the calling
905 * thread.
906 * @param pTb The translation block to free.
907 * @thread EMT(pVCpu)
908 */
909static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
910{
911 /*
912 * Validate state.
913 */
914 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
915 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
916 uint8_t const idxChunk = pTb->idxAllocChunk;
917 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
918 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
919 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
920
921 /*
922 * Call inner worker.
923 */
924 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
925}
926
927
928/**
929 * Schedules a native TB for freeing when it's not longer being executed and
930 * part of the caller's call stack.
931 *
932 * The TB will be removed from the translation block cache, though, so it isn't
933 * possible to executed it again and the IEMTB::pNext member can be used to link
934 * it together with other TBs awaiting freeing.
935 *
936 * @param pVCpu The cross context virtual CPU structure of the calling
937 * thread.
938 * @param pTb The translation block to schedule for freeing.
939 */
940static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
941{
942 /*
943 * Validate state.
944 */
945 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
946 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
947 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
948 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
949 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
950 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
951 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
952 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
953
954 /*
955 * Remove it from the cache and prepend it to the allocator's todo list.
956 */
957 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
958
959 pTb->pNext = pTbAllocator->pDelayedFreeHead;
960 pTbAllocator->pDelayedFreeHead = pTb;
961}
962
963
964/**
965 * Processes the delayed frees.
966 *
967 * This is called by the allocator function as well as the native recompile
968 * function before making any TB or executable memory allocations respectively.
969 */
970void iemTbAllocatorProcessDelayedFrees(PVMCPU pVCpu, PIEMTBALLOCATOR pTbAllocator)
971{
972 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
973 pTbAllocator->pDelayedFreeHead = NULL;
974 while (pTb)
975 {
976 PIEMTB const pTbNext = pTb->pNext;
977 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
978 iemTbAlloctorScheduleForFree(pVCpu, pTb);
979 pTb = pTbNext;
980 }
981}
982
983
984/**
985 * Grow the translation block allocator with another chunk.
986 */
987static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
988{
989 /*
990 * Validate state.
991 */
992 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
993 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
994 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
995 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
996 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
997
998 /*
999 * Allocate a new chunk and add it to the allocator.
1000 */
1001 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1002 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1003 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1004
1005 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1006 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1007 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1008 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1009 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1010 pTbAllocator->cTotalTbs += cTbsPerChunk;
1011 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1012
1013 return VINF_SUCCESS;
1014}
1015
1016
1017/**
1018 * Allocates a TB from allocator with free block.
1019 *
1020 * This is common code to both the fast and slow allocator code paths.
1021 */
1022DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1023{
1024 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1025
1026 int idxTb;
1027 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1028 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1029 pTbAllocator->cTotalTbs,
1030 pTbAllocator->iStartHint & ~(uint32_t)63);
1031 else
1032 idxTb = -1;
1033 if (idxTb < 0)
1034 {
1035 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1036 AssertLogRelReturn(idxTb >= 0, NULL);
1037 }
1038 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1039 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1040
1041 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1042 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1043 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1044 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1045 Assert(pTb->idxAllocChunk == idxChunk);
1046
1047 pTbAllocator->cInUseTbs += 1;
1048 if (fThreaded)
1049 pTbAllocator->cThreadedTbs += 1;
1050 else
1051 pTbAllocator->cNativeTbs += 1;
1052 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1053 return pTb;
1054}
1055
1056
1057/**
1058 * Slow path for iemTbAllocatorAlloc.
1059 */
1060static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1061{
1062 /*
1063 * With some luck we can add another chunk.
1064 */
1065 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1066 {
1067 int rc = iemTbAllocatorGrow(pVCpu);
1068 if (RT_SUCCESS(rc))
1069 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1070 }
1071
1072 /*
1073 * We have to prune stuff. Sigh.
1074 *
1075 * This requires scanning for older TBs and kick them out. Not sure how to
1076 * best do this as we don't want to maintain any list of TBs ordered by last
1077 * usage time. But one reasonably simple approach would be that each time we
1078 * get here we continue a sequential scan of the allocation chunks,
1079 * considering just a smallish number of TBs and freeing a fixed portion of
1080 * them. Say, we consider the next 128 TBs, freeing the least recently used
1081 * in out of groups of 4 TBs, resulting in 32 free TBs.
1082 */
1083 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1084 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1085 uint32_t const cTbsToPrune = 128;
1086 uint32_t const cTbsPerGroup = 4;
1087 uint32_t cFreedTbs = 0;
1088#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1089 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1090#else
1091 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1092#endif
1093 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1094 idxTbPruneFrom = 0;
1095 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1096 {
1097 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1098 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1099 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1100 uint32_t cMsAge = msNow - pTb->msLastUsed;
1101 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1102
1103 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1104 {
1105#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1106 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1107 { /* likely */ }
1108 else
1109 {
1110 idxInChunk2 = 0;
1111 idxChunk2 += 1;
1112 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1113 idxChunk2 = 0;
1114 }
1115#endif
1116 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1117 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1118 if ( cMsAge2 > cMsAge
1119 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1120 {
1121 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1122 pTb = pTb2;
1123 idxChunk = idxChunk2;
1124 idxInChunk = idxInChunk2;
1125 cMsAge = cMsAge2;
1126 }
1127 }
1128
1129 /* Free the TB. */
1130 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1131 cFreedTbs++; /* paranoia */
1132 }
1133 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1134 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1135
1136 /*
1137 * Allocate a TB from the ones we've pruned.
1138 */
1139 if (cFreedTbs)
1140 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1141 return NULL;
1142}
1143
1144
1145/**
1146 * Allocate a translation block.
1147 *
1148 * @returns Pointer to block on success, NULL if we're out and is unable to
1149 * free up an existing one (very unlikely once implemented).
1150 * @param pVCpu The cross context virtual CPU structure of the calling
1151 * thread.
1152 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1153 * For statistics.
1154 */
1155DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1156{
1157 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1158 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1159
1160 /* Free any pending TBs before we proceed. */
1161 if (!pTbAllocator->pDelayedFreeHead)
1162 { /* probably likely */ }
1163 else
1164 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1165
1166 /* If the allocator is full, take slow code path.*/
1167 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1168 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1169 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1170}
1171
1172
1173
1174/*********************************************************************************************************************************
1175* Threaded Recompiler Core *
1176*********************************************************************************************************************************/
1177
1178/**
1179 * Allocate a translation block for threadeded recompilation.
1180 *
1181 * This is allocated with maxed out call table and storage for opcode bytes,
1182 * because it's only supposed to be called once per EMT to allocate the TB
1183 * pointed to by IEMCPU::pThrdCompileTbR3.
1184 *
1185 * @returns Pointer to the translation block on success, NULL on failure.
1186 * @param pVM The cross context virtual machine structure.
1187 * @param pVCpu The cross context virtual CPU structure of the calling
1188 * thread.
1189 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1190 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1191 */
1192static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1193{
1194 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1195 if (pTb)
1196 {
1197 unsigned const cCalls = 256;
1198 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1199 if (pTb->Thrd.paCalls)
1200 {
1201 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1202 if (pTb->pabOpcodes)
1203 {
1204 pTb->Thrd.cAllocated = cCalls;
1205 pTb->cbOpcodesAllocated = cCalls * 16;
1206 pTb->Thrd.cCalls = 0;
1207 pTb->cbOpcodes = 0;
1208 pTb->pNext = NULL;
1209 pTb->cUsed = 0;
1210 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1211 pTb->idxAllocChunk = UINT8_MAX;
1212 pTb->GCPhysPc = GCPhysPc;
1213 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1214 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1215 pTb->cInstructions = 0;
1216
1217 /* Init the first opcode range. */
1218 pTb->cRanges = 1;
1219 pTb->aRanges[0].cbOpcodes = 0;
1220 pTb->aRanges[0].offOpcodes = 0;
1221 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1222 pTb->aRanges[0].u2Unused = 0;
1223 pTb->aRanges[0].idxPhysPage = 0;
1224 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1225 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1226
1227 return pTb;
1228 }
1229 RTMemFree(pTb->Thrd.paCalls);
1230 }
1231 RTMemFree(pTb);
1232 }
1233 RT_NOREF(pVM);
1234 return NULL;
1235}
1236
1237
1238/**
1239 * Called on the TB that are dedicated for recompilation before it's reused.
1240 *
1241 * @param pVCpu The cross context virtual CPU structure of the calling
1242 * thread.
1243 * @param pTb The translation block to reuse.
1244 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1245 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1246 */
1247static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1248{
1249 pTb->GCPhysPc = GCPhysPc;
1250 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1251 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1252 pTb->Thrd.cCalls = 0;
1253 pTb->cbOpcodes = 0;
1254 pTb->cInstructions = 0;
1255
1256 /* Init the first opcode range. */
1257 pTb->cRanges = 1;
1258 pTb->aRanges[0].cbOpcodes = 0;
1259 pTb->aRanges[0].offOpcodes = 0;
1260 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1261 pTb->aRanges[0].u2Unused = 0;
1262 pTb->aRanges[0].idxPhysPage = 0;
1263 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1264 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1265}
1266
1267
1268/**
1269 * Used to duplicate a threded translation block after recompilation is done.
1270 *
1271 * @returns Pointer to the translation block on success, NULL on failure.
1272 * @param pVM The cross context virtual machine structure.
1273 * @param pVCpu The cross context virtual CPU structure of the calling
1274 * thread.
1275 * @param pTbSrc The TB to duplicate.
1276 */
1277static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1278{
1279 /*
1280 * Just using the heap for now. Will make this more efficient and
1281 * complicated later, don't worry. :-)
1282 */
1283 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1284 if (pTb)
1285 {
1286 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1287 memcpy(pTb, pTbSrc, sizeof(*pTb));
1288 pTb->idxAllocChunk = idxAllocChunk;
1289
1290 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1291 Assert(cCalls > 0);
1292 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1293 if (pTb->Thrd.paCalls)
1294 {
1295 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1296 Assert(cbOpcodes > 0);
1297 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1298 if (pTb->pabOpcodes)
1299 {
1300 pTb->Thrd.cAllocated = cCalls;
1301 pTb->cbOpcodesAllocated = cbOpcodes;
1302 pTb->pNext = NULL;
1303 pTb->cUsed = 0;
1304 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1305 pTb->fFlags = pTbSrc->fFlags;
1306
1307 return pTb;
1308 }
1309 RTMemFree(pTb->Thrd.paCalls);
1310 }
1311 iemTbAllocatorFree(pVCpu, pTb);
1312 }
1313 RT_NOREF(pVM);
1314 return NULL;
1315
1316}
1317
1318
1319/**
1320 * Adds the given TB to the hash table.
1321 *
1322 * @param pVCpu The cross context virtual CPU structure of the calling
1323 * thread.
1324 * @param pTbCache The cache to add it to.
1325 * @param pTb The translation block to add.
1326 */
1327static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1328{
1329 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1330
1331 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1332 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1333 if (LogIs12Enabled())
1334 {
1335 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1336 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1337 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1338 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1339 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1340 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1341 pTb->aRanges[idxRange].idxPhysPage == 0
1342 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1343 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1344 }
1345}
1346
1347
1348/**
1349 * Called by opcode verifier functions when they detect a problem.
1350 */
1351void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1352{
1353 /* Unless it's safe, we can only immediately free threaded TB, as we will
1354 have more code left to execute in native TBs when fSafeToFree == false. */
1355 if (fSafeToFree || (pTb->fFlags & IEMTB_F_TYPE_THREADED))
1356 iemTbAllocatorFree(pVCpu, pTb);
1357 else
1358 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1359}
1360
1361
1362/*
1363 * Real code.
1364 */
1365
1366#ifdef LOG_ENABLED
1367/**
1368 * Logs the current instruction.
1369 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1370 * @param pszFunction The IEM function doing the execution.
1371 */
1372static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction) RT_NOEXCEPT
1373{
1374# ifdef IN_RING3
1375 if (LogIs2Enabled())
1376 {
1377 char szInstr[256];
1378 uint32_t cbInstr = 0;
1379 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1380 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1381 szInstr, sizeof(szInstr), &cbInstr);
1382
1383 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1384 Log2(("**** %s fExec=%x pTb=%p\n"
1385 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1386 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1387 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1388 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1389 " %s\n"
1390 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3,
1391 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1392 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1393 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1394 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1395 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1396 szInstr));
1397
1398 if (LogIs3Enabled())
1399 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);
1400 }
1401 else
1402# endif
1403 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1404 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1405}
1406#endif /* LOG_ENABLED */
1407
1408
1409#if 0
1410static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1411{
1412 RT_NOREF(pVM, pVCpu);
1413 return rcStrict;
1414}
1415#endif
1416
1417
1418/**
1419 * Initializes the decoder state when compiling TBs.
1420 *
1421 * This presumes that fExec has already be initialized.
1422 *
1423 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1424 * to apply fixes to them as well.
1425 *
1426 * @param pVCpu The cross context virtual CPU structure of the calling
1427 * thread.
1428 * @param fReInit Clear for the first call for a TB, set for subsequent
1429 * calls from inside the compile loop where we can skip a
1430 * couple of things.
1431 * @param fExtraFlags The extra translation block flags when @a fReInit is
1432 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1433 * checked.
1434 */
1435DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1436{
1437 /* ASSUMES: That iemInitExec was already called and that anyone changing
1438 CPU state affecting the fExec bits since then will have updated fExec! */
1439 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1440 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1441
1442 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1443
1444 /* Decoder state: */
1445 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1446 pVCpu->iem.s.enmEffAddrMode = enmMode;
1447 if (enmMode != IEMMODE_64BIT)
1448 {
1449 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1450 pVCpu->iem.s.enmEffOpSize = enmMode;
1451 }
1452 else
1453 {
1454 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1455 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1456 }
1457 pVCpu->iem.s.fPrefixes = 0;
1458 pVCpu->iem.s.uRexReg = 0;
1459 pVCpu->iem.s.uRexB = 0;
1460 pVCpu->iem.s.uRexIndex = 0;
1461 pVCpu->iem.s.idxPrefix = 0;
1462 pVCpu->iem.s.uVex3rdReg = 0;
1463 pVCpu->iem.s.uVexLength = 0;
1464 pVCpu->iem.s.fEvexStuff = 0;
1465 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1466 pVCpu->iem.s.offModRm = 0;
1467 pVCpu->iem.s.iNextMapping = 0;
1468
1469 if (!fReInit)
1470 {
1471 pVCpu->iem.s.cActiveMappings = 0;
1472 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1473 pVCpu->iem.s.fEndTb = false;
1474 pVCpu->iem.s.fTbCheckOpcodes = false;
1475 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1476 pVCpu->iem.s.fTbCrossedPage = false;
1477 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1478 pVCpu->iem.s.fTbCurInstrIsSti = false;
1479 }
1480 else
1481 {
1482 Assert(pVCpu->iem.s.cActiveMappings == 0);
1483 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1484 Assert(pVCpu->iem.s.fEndTb == false);
1485 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1486 }
1487
1488#ifdef DBGFTRACE_ENABLED
1489 switch (IEM_GET_CPU_MODE(pVCpu))
1490 {
1491 case IEMMODE_64BIT:
1492 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1493 break;
1494 case IEMMODE_32BIT:
1495 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1496 break;
1497 case IEMMODE_16BIT:
1498 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1499 break;
1500 }
1501#endif
1502}
1503
1504
1505/**
1506 * Initializes the opcode fetcher when starting the compilation.
1507 *
1508 * @param pVCpu The cross context virtual CPU structure of the calling
1509 * thread.
1510 */
1511DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1512{
1513 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1514#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1515 pVCpu->iem.s.offOpcode = 0;
1516#else
1517 RT_NOREF(pVCpu);
1518#endif
1519}
1520
1521
1522/**
1523 * Re-initializes the opcode fetcher between instructions while compiling.
1524 *
1525 * @param pVCpu The cross context virtual CPU structure of the calling
1526 * thread.
1527 */
1528DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1529{
1530 if (pVCpu->iem.s.pbInstrBuf)
1531 {
1532 uint64_t off = pVCpu->cpum.GstCtx.rip;
1533 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1534 off += pVCpu->cpum.GstCtx.cs.u64Base;
1535 off -= pVCpu->iem.s.uInstrBufPc;
1536 if (off < pVCpu->iem.s.cbInstrBufTotal)
1537 {
1538 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1539 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1540 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1541 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1542 else
1543 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1544 }
1545 else
1546 {
1547 pVCpu->iem.s.pbInstrBuf = NULL;
1548 pVCpu->iem.s.offInstrNextByte = 0;
1549 pVCpu->iem.s.offCurInstrStart = 0;
1550 pVCpu->iem.s.cbInstrBuf = 0;
1551 pVCpu->iem.s.cbInstrBufTotal = 0;
1552 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1553 }
1554 }
1555 else
1556 {
1557 pVCpu->iem.s.offInstrNextByte = 0;
1558 pVCpu->iem.s.offCurInstrStart = 0;
1559 pVCpu->iem.s.cbInstrBuf = 0;
1560 pVCpu->iem.s.cbInstrBufTotal = 0;
1561#ifdef VBOX_STRICT
1562 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1563#endif
1564 }
1565#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1566 pVCpu->iem.s.offOpcode = 0;
1567#endif
1568}
1569
1570
1571DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1572{
1573 switch (cbInstr)
1574 {
1575 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1576 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1577 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1578 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1579 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1580 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1581 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1582 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1583 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1584 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1585 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1586 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1587 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1588 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1589 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1590 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1591 }
1592}
1593
1594
1595/**
1596 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1597 *
1598 * - CS LIM check required.
1599 * - Must recheck opcode bytes.
1600 * - Previous instruction branched.
1601 * - TLB load detected, probably due to page crossing.
1602 *
1603 * @returns true if everything went well, false if we're out of space in the TB
1604 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1605 * @param pVCpu The cross context virtual CPU structure of the calling
1606 * thread.
1607 * @param pTb The translation block being compiled.
1608 */
1609bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1610{
1611 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1612#if 0
1613 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1614 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1615#endif
1616
1617 /*
1618 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1619 * see if it's needed to start checking.
1620 */
1621 bool fConsiderCsLimChecking;
1622 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1623 if ( fMode == IEM_F_MODE_X86_64BIT
1624 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
1625 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
1626 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
1627 fConsiderCsLimChecking = false; /* already enabled or not needed */
1628 else
1629 {
1630 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1631 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1632 fConsiderCsLimChecking = true; /* likely */
1633 else
1634 {
1635 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
1636 return false;
1637 }
1638 }
1639
1640 /*
1641 * Prepare call now, even before we know if can accept the instruction in this TB.
1642 * This allows us amending parameters w/o making every case suffer.
1643 */
1644 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1645 uint16_t const offOpcode = pTb->cbOpcodes;
1646 uint8_t idxRange = pTb->cRanges - 1;
1647
1648 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1649 pCall->idxInstr = pTb->cInstructions;
1650 pCall->offOpcode = offOpcode;
1651 pCall->idxRange = idxRange;
1652 pCall->cbOpcode = cbInstr;
1653 pCall->auParams[0] = cbInstr;
1654 pCall->auParams[1] = idxRange;
1655 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1656
1657/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1658 * gotten onto. If we do, stop */
1659
1660 /*
1661 * Case 1: We've branched (RIP changed).
1662 *
1663 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1664 * Req: 1 extra range, no extra phys.
1665 *
1666 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1667 * necessary (fTbCrossedPage is true).
1668 * Req: 1 extra range, probably 1 extra phys page entry.
1669 *
1670 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1671 * but in addition we cross into the following page and require
1672 * another TLB load.
1673 * Req: 2 extra ranges, probably 2 extra phys page entries.
1674 *
1675 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1676 * the following page (thus fTbCrossedPage is true).
1677 * Req: 2 extra ranges, probably 1 extra phys page entry.
1678 *
1679 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1680 * it may trigger "spuriously" from the CPU point of view because of
1681 * physical page changes that'll invalid the physical TLB and trigger a
1682 * call to the function. In theory this be a big deal, just a bit
1683 * performance loss as we'll pick the LoadingTlb variants.
1684 *
1685 * Note! We do not currently optimize branching to the next instruction (sorry
1686 * 32-bit PIC code). We could maybe do that in the branching code that
1687 * sets (or not) fTbBranched.
1688 */
1689 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1690 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1691 * code. This'll require filtering out far jmps and calls, as they
1692 * load CS which should technically be considered indirect since the
1693 * GDT/LDT entry's base address can be modified independently from
1694 * the code. */
1695 if (pVCpu->iem.s.fTbBranched != 0)
1696 {
1697 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1698 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1699 {
1700 /* 1a + 1b - instruction fully within the branched to page. */
1701 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1702 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1703
1704 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1705 {
1706 /* Check that we've got a free range. */
1707 idxRange += 1;
1708 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1709 { /* likely */ }
1710 else
1711 {
1712 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1713 return false;
1714 }
1715 pCall->idxRange = idxRange;
1716 pCall->auParams[1] = idxRange;
1717 pCall->auParams[2] = 0;
1718
1719 /* Check that we've got a free page slot. */
1720 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1721 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1722 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1723 pTb->aRanges[idxRange].idxPhysPage = 0;
1724 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1725 || pTb->aGCPhysPages[0] == GCPhysNew)
1726 {
1727 pTb->aGCPhysPages[0] = GCPhysNew;
1728 pTb->aRanges[idxRange].idxPhysPage = 1;
1729 }
1730 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1731 || pTb->aGCPhysPages[1] == GCPhysNew)
1732 {
1733 pTb->aGCPhysPages[1] = GCPhysNew;
1734 pTb->aRanges[idxRange].idxPhysPage = 2;
1735 }
1736 else
1737 {
1738 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1739 return false;
1740 }
1741
1742 /* Finish setting up the new range. */
1743 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1744 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1745 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1746 pTb->aRanges[idxRange].u2Unused = 0;
1747 pTb->cRanges++;
1748 }
1749 else
1750 {
1751 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1752 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1753 }
1754
1755 /* Determin which function we need to load & check.
1756 Note! For jumps to a new page, we'll set both fTbBranched and
1757 fTbCrossedPage to avoid unnecessary TLB work for intra
1758 page branching */
1759 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1760 || pVCpu->iem.s.fTbCrossedPage)
1761 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1762 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1763 : !fConsiderCsLimChecking
1764 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1765 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1766 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1767 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1768 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1769 : !fConsiderCsLimChecking
1770 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1771 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1772 else
1773 {
1774 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1775 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1776 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1777 : !fConsiderCsLimChecking
1778 ? kIemThreadedFunc_BltIn_CheckOpcodes
1779 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1780 }
1781 }
1782 else
1783 {
1784 /* 1c + 1d - instruction crosses pages. */
1785 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1786 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1787
1788 /* Lazy bird: Check that this isn't case 1c, since we've already
1789 load the first physical address. End the TB and
1790 make it a case 2b instead.
1791
1792 Hmm. Too much bother to detect, so just do the same
1793 with case 1d as well. */
1794#if 0 /** @todo get back to this later when we've got the actual branch code in
1795 * place. */
1796 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1797
1798 /* Check that we've got two free ranges. */
1799 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1800 { /* likely */ }
1801 else
1802 return false;
1803 idxRange += 1;
1804 pCall->idxRange = idxRange;
1805 pCall->auParams[1] = idxRange;
1806 pCall->auParams[2] = 0;
1807
1808 /* ... */
1809
1810#else
1811 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1812 return false;
1813#endif
1814 }
1815 }
1816
1817 /*
1818 * Case 2: Page crossing.
1819 *
1820 * Sub-case 2a: The instruction starts on the first byte in the next page.
1821 *
1822 * Sub-case 2b: The instruction has opcode bytes in both the current and
1823 * following page.
1824 *
1825 * Both cases requires a new range table entry and probably a new physical
1826 * page entry. The difference is in which functions to emit and whether to
1827 * add bytes to the current range.
1828 */
1829 else if (pVCpu->iem.s.fTbCrossedPage)
1830 {
1831 /* Check that we've got a free range. */
1832 idxRange += 1;
1833 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1834 { /* likely */ }
1835 else
1836 {
1837 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1838 return false;
1839 }
1840
1841 /* Check that we've got a free page slot. */
1842 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1843 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1844 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1845 pTb->aRanges[idxRange].idxPhysPage = 0;
1846 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1847 || pTb->aGCPhysPages[0] == GCPhysNew)
1848 {
1849 pTb->aGCPhysPages[0] = GCPhysNew;
1850 pTb->aRanges[idxRange].idxPhysPage = 1;
1851 }
1852 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1853 || pTb->aGCPhysPages[1] == GCPhysNew)
1854 {
1855 pTb->aGCPhysPages[1] = GCPhysNew;
1856 pTb->aRanges[idxRange].idxPhysPage = 2;
1857 }
1858 else
1859 {
1860 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1861 return false;
1862 }
1863
1864 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
1865 {
1866 Assert(pVCpu->iem.s.offCurInstrStart == 0);
1867 pCall->idxRange = idxRange;
1868 pCall->auParams[1] = idxRange;
1869 pCall->auParams[2] = 0;
1870
1871 /* Finish setting up the new range. */
1872 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1873 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1874 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1875 pTb->aRanges[idxRange].u2Unused = 0;
1876 pTb->cRanges++;
1877
1878 /* Determin which function we need to load & check. */
1879 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1880 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1881 : !fConsiderCsLimChecking
1882 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1883 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
1884 }
1885 else
1886 {
1887 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1888 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1889 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1890 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
1891
1892 /* We've good. Split the instruction over the old and new range table entries. */
1893 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
1894
1895 pTb->aRanges[idxRange].offPhysPage = 0;
1896 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
1897 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
1898 pTb->aRanges[idxRange].u2Unused = 0;
1899 pTb->cRanges++;
1900
1901 /* Determin which function we need to load & check. */
1902 if (pVCpu->iem.s.fTbCheckOpcodes)
1903 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1904 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1905 : !fConsiderCsLimChecking
1906 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1907 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
1908 else
1909 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1910 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1911 : !fConsiderCsLimChecking
1912 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1913 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
1914 }
1915 }
1916
1917 /*
1918 * Regular case: No new range required.
1919 */
1920 else
1921 {
1922 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
1923 if (pVCpu->iem.s.fTbCheckOpcodes)
1924 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1925 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1926 : kIemThreadedFunc_BltIn_CheckOpcodes;
1927 else
1928 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
1929
1930 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1931 pTb->cbOpcodes = offOpcode + cbInstr;
1932 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1933 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1934 }
1935
1936 /*
1937 * Commit the call.
1938 */
1939 pTb->Thrd.cCalls++;
1940
1941 /*
1942 * Clear state.
1943 */
1944 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1945 pVCpu->iem.s.fTbCrossedPage = false;
1946 pVCpu->iem.s.fTbCheckOpcodes = false;
1947
1948 /*
1949 * Copy opcode bytes.
1950 */
1951 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1952 pTb->cbOpcodes = offOpcode + cbInstr;
1953 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1954
1955 return true;
1956}
1957
1958
1959/**
1960 * Worker for iemThreadedCompileBeginEmitCallsComplications and
1961 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
1962 *
1963 * @returns true if anything is pending, false if not.
1964 * @param pVCpu The cross context virtual CPU structure of the calling
1965 * thread.
1966 */
1967DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
1968{
1969 uint64_t fCpu = pVCpu->fLocalForcedActions;
1970 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
1971#if 1
1972 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
1973 if (RT_LIKELY( !fCpu
1974 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1975 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
1976 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
1977 return false;
1978 return true;
1979#else
1980 return false;
1981#endif
1982
1983}
1984
1985
1986/**
1987 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
1988 * set.
1989 *
1990 * @returns true if we should continue, false if an IRQ is deliverable or a
1991 * relevant force flag is pending.
1992 * @param pVCpu The cross context virtual CPU structure of the calling
1993 * thread.
1994 * @param pTb The translation block being compiled.
1995 * @sa iemThreadedCompileCheckIrq
1996 */
1997bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
1998{
1999 /*
2000 * Skip this we've already emitted a call after the previous instruction
2001 * or if it's the first call, as we're always checking FFs between blocks.
2002 */
2003 uint32_t const idxCall = pTb->Thrd.cCalls;
2004 if ( idxCall > 0
2005 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2006 {
2007 /* Emit the call. */
2008 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2009 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2010 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2011 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2012 pCall->idxInstr = pTb->cInstructions;
2013 pCall->uUnused0 = 0;
2014 pCall->offOpcode = 0;
2015 pCall->cbOpcode = 0;
2016 pCall->idxRange = 0;
2017 pCall->auParams[0] = 0;
2018 pCall->auParams[1] = 0;
2019 pCall->auParams[2] = 0;
2020 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2021
2022 /* Reset the IRQ check value. */
2023 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2024
2025 /*
2026 * Check for deliverable IRQs and pending force flags.
2027 */
2028 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2029 }
2030 return true; /* continue */
2031}
2032
2033
2034/**
2035 * Emits an IRQ check call and checks for pending IRQs.
2036 *
2037 * @returns true if we should continue, false if an IRQ is deliverable or a
2038 * relevant force flag is pending.
2039 * @param pVCpu The cross context virtual CPU structure of the calling
2040 * thread.
2041 * @param pTb The transation block.
2042 * @sa iemThreadedCompileBeginEmitCallsComplications
2043 */
2044static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2045{
2046 /* Check again in a little bit, unless it is immediately following an STI
2047 in which case we *must* check immediately after the next instruction
2048 as well in case it's executed with interrupt inhibition. We could
2049 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2050 bs3-timers-1 which is doing sti + sti + cli. */
2051 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2052 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2053 else
2054 {
2055 pVCpu->iem.s.fTbCurInstrIsSti = false;
2056 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2057 }
2058 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2059
2060 /*
2061 * Emit the call.
2062 */
2063 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2064 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2065 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2066 pCall->idxInstr = pTb->cInstructions;
2067 pCall->uUnused0 = 0;
2068 pCall->offOpcode = 0;
2069 pCall->cbOpcode = 0;
2070 pCall->idxRange = 0;
2071 pCall->auParams[0] = 0;
2072 pCall->auParams[1] = 0;
2073 pCall->auParams[2] = 0;
2074
2075 /*
2076 * Check for deliverable IRQs and pending force flags.
2077 */
2078 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2079}
2080
2081
2082/**
2083 * Compiles a new TB and executes it.
2084 *
2085 * We combine compilation and execution here as it makes it simpler code flow
2086 * in the main loop and it allows interpreting while compiling if we want to
2087 * explore that option.
2088 *
2089 * @returns Strict VBox status code.
2090 * @param pVM The cross context virtual machine structure.
2091 * @param pVCpu The cross context virtual CPU structure of the calling
2092 * thread.
2093 * @param GCPhysPc The physical address corresponding to the current
2094 * RIP+CS.BASE.
2095 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2096 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2097 */
2098static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2099{
2100 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2101 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2102
2103 /*
2104 * Get the TB we use for the recompiling. This is a maxed-out TB so
2105 * that'll we'll make a more efficient copy of when we're done compiling.
2106 */
2107 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2108 if (pTb)
2109 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2110 else
2111 {
2112 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2113 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2114 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2115 }
2116
2117 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2118 functions may get at it. */
2119 pVCpu->iem.s.pCurTbR3 = pTb;
2120
2121#if 0
2122 /* Make sure the CheckIrq condition matches the one in EM. */
2123 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2124 const uint32_t cZeroCalls = 1;
2125#else
2126 const uint32_t cZeroCalls = 0;
2127#endif
2128
2129 /*
2130 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2131 */
2132 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2133 iemThreadedCompileInitOpcodeFetching(pVCpu);
2134 VBOXSTRICTRC rcStrict;
2135 for (;;)
2136 {
2137 /* Process the next instruction. */
2138#ifdef LOG_ENABLED
2139 iemThreadedLogCurInstr(pVCpu, "CC");
2140 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2141 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2142#endif
2143 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2144 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2145
2146 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2147 if ( rcStrict == VINF_SUCCESS
2148 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2149 && !pVCpu->iem.s.fEndTb)
2150 {
2151 Assert(pTb->Thrd.cCalls > cCallsPrev);
2152 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2153
2154 pVCpu->iem.s.cInstructions++;
2155 }
2156 else
2157 {
2158 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2159 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2160 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2161 rcStrict = VINF_SUCCESS;
2162
2163 if (pTb->Thrd.cCalls > cZeroCalls)
2164 {
2165 if (cCallsPrev != pTb->Thrd.cCalls)
2166 pVCpu->iem.s.cInstructions++;
2167 break;
2168 }
2169
2170 pVCpu->iem.s.pCurTbR3 = NULL;
2171 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2172 }
2173
2174 /* Check for IRQs? */
2175 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2176 pVCpu->iem.s.cInstrTillIrqCheck--;
2177 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2178 break;
2179
2180 /* Still space in the TB? */
2181 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2182 && pTb->cbOpcodes + 16 <= pTb->cbOpcodesAllocated)
2183 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2184 else
2185 {
2186 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2187 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2188 break;
2189 }
2190 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2191 }
2192
2193 /*
2194 * Duplicate the TB into a completed one and link it.
2195 */
2196 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2197 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2198
2199 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2200
2201#ifdef IEM_COMPILE_ONLY_MODE
2202 /*
2203 * Execute the translation block.
2204 */
2205#endif
2206
2207 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2208}
2209
2210
2211
2212/*********************************************************************************************************************************
2213* Recompiled Execution Core *
2214*********************************************************************************************************************************/
2215
2216/**
2217 * Executes a translation block.
2218 *
2219 * @returns Strict VBox status code.
2220 * @param pVCpu The cross context virtual CPU structure of the calling
2221 * thread.
2222 * @param pTb The translation block to execute.
2223 */
2224static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2225{
2226 /*
2227 * Check the opcodes in the first page before starting execution.
2228 */
2229 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2230 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2231 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2232 { /* likely */ }
2233 else
2234 {
2235 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2236 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2237 return VINF_SUCCESS;
2238 }
2239
2240 /*
2241 * Set the current TB so CIMPL functions may get at it.
2242 */
2243 pVCpu->iem.s.pCurTbR3 = pTb;
2244
2245 /*
2246 * Execute the block.
2247 */
2248#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2249 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2250 {
2251 pVCpu->iem.s.cTbExecNative++;
2252 typedef IEM_DECL_IMPL_TYPE(int, FNIEMNATIVETB, (PVMCPUCC pVCpu, PIEMTB pTb));
2253# ifdef LOG_ENABLED
2254 iemThreadedLogCurInstr(pVCpu, "EXn");
2255# endif
2256 VBOXSTRICTRC const rcStrict = ((FNIEMNATIVETB *)pTb->Native.paInstructions)(pVCpu, pTb);
2257 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2258 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2259 { /* likely */ }
2260 else
2261 {
2262 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2263 pVCpu->iem.s.pCurTbR3 = NULL;
2264 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2265
2266 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2267 only to break out of TB execution early. */
2268 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2269 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2270 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2271 }
2272 }
2273 else
2274#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2275 {
2276 /*
2277 * The threaded execution loop.
2278 */
2279 pVCpu->iem.s.cTbExecThreaded++;
2280#ifdef LOG_ENABLED
2281 uint64_t uRipPrev = UINT64_MAX;
2282#endif
2283 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2284 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2285 while (cCallsLeft-- > 0)
2286 {
2287#ifdef LOG_ENABLED
2288 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2289 {
2290 uRipPrev = pVCpu->cpum.GstCtx.rip;
2291 iemThreadedLogCurInstr(pVCpu, "EXt");
2292 }
2293 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2294 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2295 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2296#endif
2297 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2298 pCallEntry->auParams[0],
2299 pCallEntry->auParams[1],
2300 pCallEntry->auParams[2]);
2301 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2302 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2303 pCallEntry++;
2304 else
2305 {
2306 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2307 pVCpu->iem.s.pCurTbR3 = NULL;
2308 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2309
2310 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2311 only to break out of TB execution early. */
2312 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2313 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2314 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2315 }
2316 }
2317 }
2318
2319 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2320 pVCpu->iem.s.pCurTbR3 = NULL;
2321 return VINF_SUCCESS;
2322}
2323
2324
2325/**
2326 * This is called when the PC doesn't match the current pbInstrBuf.
2327 *
2328 * Upon return, we're ready for opcode fetching. But please note that
2329 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2330 * MMIO or unassigned).
2331 */
2332static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2333{
2334 pVCpu->iem.s.pbInstrBuf = NULL;
2335 pVCpu->iem.s.offCurInstrStart = 0;
2336 pVCpu->iem.s.offInstrNextByte = 0;
2337 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2338 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2339}
2340
2341
2342/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2343DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2344{
2345 /*
2346 * Set uCurTbStartPc to RIP and calc the effective PC.
2347 */
2348 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2349 pVCpu->iem.s.uCurTbStartPc = uPc;
2350 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2351 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2352
2353 /*
2354 * Advance within the current buffer (PAGE) when possible.
2355 */
2356 if (pVCpu->iem.s.pbInstrBuf)
2357 {
2358 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2359 if (off < pVCpu->iem.s.cbInstrBufTotal)
2360 {
2361 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2362 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2363 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2364 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2365 else
2366 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2367
2368 return pVCpu->iem.s.GCPhysInstrBuf + off;
2369 }
2370 }
2371 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2372}
2373
2374
2375/**
2376 * Determines the extra IEMTB_F_XXX flags.
2377 *
2378 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2379 * IEMTB_F_CS_LIM_CHECKS (or zero).
2380 * @param pVCpu The cross context virtual CPU structure of the calling
2381 * thread.
2382 */
2383DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2384{
2385 uint32_t fRet = 0;
2386
2387 /*
2388 * Determine the inhibit bits.
2389 */
2390 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2391 { /* typical */ }
2392 else
2393 {
2394 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2395 fRet |= IEMTB_F_INHIBIT_SHADOW;
2396 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2397 fRet |= IEMTB_F_INHIBIT_NMI;
2398 }
2399
2400 /*
2401 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2402 * likely to go invalid before the end of the translation block.
2403 */
2404 if (IEM_IS_64BIT_CODE(pVCpu))
2405 return fRet;
2406
2407 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2408 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2409 return fRet;
2410 return fRet | IEMTB_F_CS_LIM_CHECKS;
2411}
2412
2413
2414VMMDECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2415{
2416 /*
2417 * See if there is an interrupt pending in TRPM, inject it if we can.
2418 */
2419 if (!TRPMHasTrap(pVCpu))
2420 { /* likely */ }
2421 else
2422 {
2423 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2424 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2425 { /*likely */ }
2426 else
2427 return rcStrict;
2428 }
2429
2430 /*
2431 * Init the execution environment.
2432 */
2433 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2434 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2435 { }
2436 else
2437 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2438
2439 /*
2440 * Run-loop.
2441 *
2442 * If we're using setjmp/longjmp we combine all the catching here to avoid
2443 * having to call setjmp for each block we're executing.
2444 */
2445 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2446 for (;;)
2447 {
2448 PIEMTB pTb = NULL;
2449 VBOXSTRICTRC rcStrict;
2450 IEM_TRY_SETJMP(pVCpu, rcStrict)
2451 {
2452 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2453 for (uint32_t iIterations = 0; ; iIterations++)
2454 {
2455 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2456 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2457 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2458
2459 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2460 if (pTb)
2461 rcStrict = iemTbExec(pVCpu, pTb);
2462 else
2463 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2464 if (rcStrict == VINF_SUCCESS)
2465 {
2466 Assert(pVCpu->iem.s.cActiveMappings == 0);
2467
2468 uint64_t fCpu = pVCpu->fLocalForcedActions;
2469 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2470 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2471 | VMCPU_FF_TLB_FLUSH
2472 | VMCPU_FF_UNHALT );
2473 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2474 if (RT_LIKELY( ( !fCpu
2475 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2476 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2477 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2478 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2479 {
2480 if (RT_LIKELY( (iIterations & cPollRate) != 0
2481 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2482 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2483 else
2484 return VINF_SUCCESS;
2485 }
2486 else
2487 return VINF_SUCCESS;
2488 }
2489 else
2490 return rcStrict;
2491 }
2492 }
2493 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2494 {
2495 pVCpu->iem.s.cLongJumps++;
2496 if (pVCpu->iem.s.cActiveMappings > 0)
2497 iemMemRollback(pVCpu);
2498
2499#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2500 /* If pTb isn't NULL we're in iemTbExec. */
2501 if (!pTb)
2502 {
2503 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2504 pTb = pVCpu->iem.s.pCurTbR3;
2505 if (pTb)
2506 {
2507 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
2508 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
2509 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
2510 }
2511 }
2512#endif
2513 return rcStrict;
2514 }
2515 IEM_CATCH_LONGJMP_END(pVCpu);
2516 }
2517}
2518
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette