VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 101626

Last change on this file since 101626 was 101538, checked in by vboxsync, 14 months ago

VMM/IEM: Freed up some space in IEMTB for a pointer to debug info or something. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.8 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 101538 2023-10-21 22:30:50Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) :
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95
96
97/*
98 * Narrow down configs here to avoid wasting time on unused configs here.
99 */
100
101#ifndef IEM_WITH_CODE_TLB
102# error The code TLB must be enabled for the recompiler.
103#endif
104
105#ifndef IEM_WITH_DATA_TLB
106# error The data TLB must be enabled for the recompiler.
107#endif
108
109#ifndef IEM_WITH_SETJMP
110# error The setjmp approach must be enabled for the recompiler.
111#endif
112
113
114/*********************************************************************************************************************************
115* Internal Functions *
116*********************************************************************************************************************************/
117static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
118
119
120/**
121 * Calculates the effective address of a ModR/M memory operand, extended version
122 * for use in the recompilers.
123 *
124 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
125 *
126 * May longjmp on internal error.
127 *
128 * @return The effective address.
129 * @param pVCpu The cross context virtual CPU structure of the calling thread.
130 * @param bRm The ModRM byte.
131 * @param cbImmAndRspOffset - First byte: The size of any immediate
132 * following the effective address opcode bytes
133 * (only for RIP relative addressing).
134 * - Second byte: RSP displacement (for POP [ESP]).
135 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
136 * SIB byte (bits 39:32).
137 *
138 * @note This must be defined in a source file with matching
139 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
140 * or implemented differently...
141 */
142RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
143{
144 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
145# define SET_SS_DEF() \
146 do \
147 { \
148 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
149 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
150 } while (0)
151
152 if (!IEM_IS_64BIT_CODE(pVCpu))
153 {
154/** @todo Check the effective address size crap! */
155 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
156 {
157 uint16_t u16EffAddr;
158
159 /* Handle the disp16 form with no registers first. */
160 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
161 {
162 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
163 *puInfo = u16EffAddr;
164 }
165 else
166 {
167 /* Get the displacment. */
168 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
169 {
170 case 0: u16EffAddr = 0; break;
171 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
172 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
173 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
174 }
175 *puInfo = u16EffAddr;
176
177 /* Add the base and index registers to the disp. */
178 switch (bRm & X86_MODRM_RM_MASK)
179 {
180 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
181 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
182 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
183 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
184 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
185 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
186 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
187 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
188 }
189 }
190
191 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
192 return u16EffAddr;
193 }
194
195 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
196 uint32_t u32EffAddr;
197 uint64_t uInfo;
198
199 /* Handle the disp32 form with no registers first. */
200 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
201 {
202 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
203 uInfo = u32EffAddr;
204 }
205 else
206 {
207 /* Get the register (or SIB) value. */
208 uInfo = 0;
209 switch ((bRm & X86_MODRM_RM_MASK))
210 {
211 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
212 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
213 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
214 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
215 case 4: /* SIB */
216 {
217 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
218 uInfo = (uint64_t)bSib << 32;
219
220 /* Get the index and scale it. */
221 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
222 {
223 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
224 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
225 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
226 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
227 case 4: u32EffAddr = 0; /*none */ break;
228 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
229 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
230 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
231 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
232 }
233 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
234
235 /* add base */
236 switch (bSib & X86_SIB_BASE_MASK)
237 {
238 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
239 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
240 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
241 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
242 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
243 case 5:
244 if ((bRm & X86_MODRM_MOD_MASK) != 0)
245 {
246 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
247 SET_SS_DEF();
248 }
249 else
250 {
251 uint32_t u32Disp;
252 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
253 u32EffAddr += u32Disp;
254 uInfo |= u32Disp;
255 }
256 break;
257 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
258 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
259 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
260 }
261 break;
262 }
263 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
264 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
265 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
266 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
267 }
268
269 /* Get and add the displacement. */
270 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
271 {
272 case 0:
273 break;
274 case 1:
275 {
276 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
277 u32EffAddr += i8Disp;
278 uInfo |= (uint32_t)(int32_t)i8Disp;
279 break;
280 }
281 case 2:
282 {
283 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
284 u32EffAddr += u32Disp;
285 uInfo |= u32Disp;
286 break;
287 }
288 default:
289 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
290 }
291 }
292
293 *puInfo = uInfo;
294 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
295 return u32EffAddr;
296 }
297
298 uint64_t u64EffAddr;
299 uint64_t uInfo;
300
301 /* Handle the rip+disp32 form with no registers first. */
302 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
303 {
304 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
305 uInfo = (uint32_t)u64EffAddr;
306 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
307 }
308 else
309 {
310 /* Get the register (or SIB) value. */
311 uInfo = 0;
312 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
313 {
314 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
315 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
316 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
317 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
318 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
319 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
320 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
321 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
322 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
323 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
324 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
325 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
326 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
327 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
328 /* SIB */
329 case 4:
330 case 12:
331 {
332 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
333 uInfo = (uint64_t)bSib << 32;
334
335 /* Get the index and scale it. */
336 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
337 {
338 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
339 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
340 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
341 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
342 case 4: u64EffAddr = 0; /*none */ break;
343 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
344 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
345 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
346 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
347 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
348 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
349 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
350 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
351 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
352 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
353 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
354 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
355 }
356 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
357
358 /* add base */
359 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
360 {
361 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
362 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
363 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
364 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
365 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
366 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
367 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
368 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
369 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
370 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
371 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
372 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
373 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
374 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
375 /* complicated encodings */
376 case 5:
377 case 13:
378 if ((bRm & X86_MODRM_MOD_MASK) != 0)
379 {
380 if (!pVCpu->iem.s.uRexB)
381 {
382 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
383 SET_SS_DEF();
384 }
385 else
386 u64EffAddr += pVCpu->cpum.GstCtx.r13;
387 }
388 else
389 {
390 uint32_t u32Disp;
391 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
392 u64EffAddr += (int32_t)u32Disp;
393 uInfo |= u32Disp;
394 }
395 break;
396 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
397 }
398 break;
399 }
400 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
401 }
402
403 /* Get and add the displacement. */
404 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
405 {
406 case 0:
407 break;
408 case 1:
409 {
410 int8_t i8Disp;
411 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
412 u64EffAddr += i8Disp;
413 uInfo |= (uint32_t)(int32_t)i8Disp;
414 break;
415 }
416 case 2:
417 {
418 uint32_t u32Disp;
419 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
420 u64EffAddr += (int32_t)u32Disp;
421 uInfo |= u32Disp;
422 break;
423 }
424 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
425 }
426
427 }
428
429 *puInfo = uInfo;
430 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
431 {
432 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
433 return u64EffAddr;
434 }
435 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
436 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
437 return u64EffAddr & UINT32_MAX;
438}
439
440
441/*********************************************************************************************************************************
442* Translation Block Cache. *
443*********************************************************************************************************************************/
444
445/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
446static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
447{
448 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
449 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
450 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
451 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
452 if (cMsSinceUse1 != cMsSinceUse2)
453 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
454 if (pTb1->cUsed != pTb2->cUsed)
455 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
456 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
457 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
458 return 0;
459}
460
461#ifdef VBOX_STRICT
462/**
463 * Assertion helper that checks a collisions list count.
464 */
465static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
466{
467 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
468 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
469 while (pTb)
470 {
471 pTb = pTb->pNext;
472 cLeft--;
473 }
474 AssertMsg(cLeft == 0,
475 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
476 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
477}
478#endif
479
480
481DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
482{
483 STAM_PROFILE_START(&pTbCache->StatPrune, a);
484
485 /*
486 * First convert the collision list to an array.
487 */
488 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
489 uintptr_t cInserted = 0;
490 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
491
492 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
493
494 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
495 {
496 apSortedTbs[cInserted++] = pTbCollision;
497 pTbCollision = pTbCollision->pNext;
498 }
499
500 /* Free any excess (impossible). */
501 if (RT_LIKELY(!pTbCollision))
502 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
503 else
504 do
505 {
506 PIEMTB pTbToFree = pTbCollision;
507 pTbCollision = pTbToFree->pNext;
508 iemTbAllocatorFree(pVCpu, pTbToFree);
509 } while (pTbCollision);
510
511 /*
512 * Sort it by most recently used and usage count.
513 */
514 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
515
516 /* We keep half the list for now. Perhaps a bit aggressive... */
517 uintptr_t const cKeep = cInserted / 2;
518
519 /* First free up the TBs we don't wish to keep (before creating the new
520 list because otherwise the free code will scan the list for each one
521 without ever finding it). */
522 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
523 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
524
525 /* Then chain the new TB together with the ones we like to keep of the
526 existing ones and insert this list into the hash table. */
527 pTbCollision = pTb;
528 for (uintptr_t idx = 0; idx < cKeep; idx++)
529 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
530 pTbCollision->pNext = NULL;
531
532 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
533#ifdef VBOX_STRICT
534 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
535#endif
536
537 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
538}
539
540
541static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
542{
543 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
544 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
545 if (!pTbOldHead)
546 {
547 pTb->pNext = NULL;
548 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
549 }
550 else
551 {
552 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
553 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
554 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
555 {
556 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
557 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
558#ifdef VBOX_STRICT
559 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
560#endif
561 }
562 else
563 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
564 }
565}
566
567
568/**
569 * Unlinks @a pTb from the hash table if found in it.
570 *
571 * @returns true if unlinked, false if not present.
572 * @param pTbCache The hash table.
573 * @param pTb The TB to remove.
574 */
575static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
576{
577 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
578 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
579 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
580
581 /*
582 * At the head of the collision list?
583 */
584 if (pTbHash == pTb)
585 {
586 if (!pTb->pNext)
587 pTbCache->apHash[idxHash] = NULL;
588 else
589 {
590 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
591 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
592#ifdef VBOX_STRICT
593 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
594#endif
595 }
596 return true;
597 }
598
599 /*
600 * Search the collision list.
601 */
602 PIEMTB const pTbHead = pTbHash;
603 while (pTbHash)
604 {
605 PIEMTB const pNextTb = pTbHash->pNext;
606 if (pNextTb == pTb)
607 {
608 pTbHash->pNext = pTb->pNext;
609 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
610#ifdef VBOX_STRICT
611 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
612#endif
613 return true;
614 }
615 pTbHash = pNextTb;
616 }
617 return false;
618}
619
620
621/**
622 * Looks up a TB for the given PC and flags in the cache.
623 *
624 * @returns Pointer to TB on success, NULL if not found.
625 * @param pVCpu The cross context virtual CPU structure of the
626 * calling thread.
627 * @param pTbCache The translation block cache.
628 * @param GCPhysPc The PC to look up a TB for.
629 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
630 * the lookup.
631 * @thread EMT(pVCpu)
632 */
633static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
634 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
635{
636 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
637 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
638 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
639#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
640 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
641#endif
642 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p L %d\n", fFlags, GCPhysPc, idxHash, pTb, cLeft));
643 while (pTb)
644 {
645 if (pTb->GCPhysPc == GCPhysPc)
646 {
647 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
648 {
649 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
650 {
651 STAM_COUNTER_INC(&pTbCache->cLookupHits);
652 AssertMsg(cLeft > 0, ("%d\n", cLeft));
653
654 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
655 pTb->cUsed++;
656#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
657 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != 16)
658 return pTb;
659 return iemNativeRecompile(pVCpu, pTb);
660#else
661 return pTb;
662#endif
663 }
664 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
665 }
666 else
667 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
668 }
669 else
670 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
671
672 pTb = pTb->pNext;
673#ifdef VBOX_STRICT
674 cLeft--;
675#endif
676 }
677 AssertMsg(cLeft == 0, ("%d\n", cLeft));
678 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
679 return pTb;
680}
681
682
683/*********************************************************************************************************************************
684* Translation Block Allocator.
685*********************************************************************************************************************************/
686/*
687 * Translation block allocationmanagement.
688 */
689
690#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
691# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
692 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
693# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
694 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
695# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
696 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
697#else
698# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
699 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
700# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
701 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
702# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
703 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
704#endif
705/** Makes a TB index from a chunk index and TB index within that chunk. */
706#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
707 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
708
709
710/**
711 * Initializes the TB allocator and cache for an EMT.
712 *
713 * @returns VBox status code.
714 * @param pVM The VM handle.
715 * @param cInitialTbs The initial number of translation blocks to
716 * preallocator.
717 * @param cMaxTbs The max number of translation blocks allowed.
718 * @param cbInitialExec The initial size of the executable memory allocator.
719 * @param cbMaxExec The max size of the executable memory allocator.
720 * @param cbChunkExec The chunk size for executable memory allocator. Zero
721 * or UINT32_MAX for automatically determining this.
722 * @thread EMT
723 */
724DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
725 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
726{
727 PVMCPUCC pVCpu = VMMGetCpu(pVM);
728 Assert(!pVCpu->iem.s.pTbCacheR3);
729 Assert(!pVCpu->iem.s.pTbAllocatorR3);
730
731 /*
732 * Calculate the chunk size of the TB allocator.
733 * The minimum chunk size is 2MiB.
734 */
735 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
736 uint32_t cbPerChunk = _2M;
737 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
738#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
739 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
740 uint8_t cChunkShift = 21 - cTbShift;
741 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
742#endif
743 for (;;)
744 {
745 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
746 break;
747 cbPerChunk *= 2;
748 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
749#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
750 cChunkShift += 1;
751#endif
752 }
753
754 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
755 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
756 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
757
758 cMaxTbs = cMaxChunks * cTbsPerChunk;
759
760 /*
761 * Allocate and initalize it.
762 */
763 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
764 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
765 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
766 if (!pTbAllocator)
767 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
768 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
769 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
770 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
771 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
772 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
773 pTbAllocator->cbPerChunk = cbPerChunk;
774 pTbAllocator->cMaxTbs = cMaxTbs;
775#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
776 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
777 pTbAllocator->cChunkShift = cChunkShift;
778 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
779#endif
780
781 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
782 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
783
784 /*
785 * Allocate the initial chunks.
786 */
787 for (uint32_t idxChunk = 0; ; idxChunk++)
788 {
789 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
790 if (!paTbs)
791 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
792 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
793 cbPerChunk, idxChunk, pVCpu->idCpu);
794
795 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
796 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
797 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
798 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
799 pTbAllocator->cTotalTbs += cTbsPerChunk;
800
801 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
802 break;
803 }
804
805 /*
806 * Calculate the size of the hash table. We double the max TB count and
807 * round it up to the nearest power of two.
808 */
809 uint32_t cCacheEntries = cMaxTbs * 2;
810 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
811 {
812 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
813 cCacheEntries = RT_BIT_32(iBitTop);
814 Assert(cCacheEntries >= cMaxTbs * 2);
815 }
816
817 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
818 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
819 if (!pTbCache)
820 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
821 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
822 cbTbCache, cCacheEntries, pVCpu->idCpu);
823
824 /*
825 * Initialize it (assumes zeroed by the allocator).
826 */
827 pTbCache->uMagic = IEMTBCACHE_MAGIC;
828 pTbCache->cHash = cCacheEntries;
829 pTbCache->uHashMask = cCacheEntries - 1;
830 Assert(pTbCache->cHash > pTbCache->uHashMask);
831 pVCpu->iem.s.pTbCacheR3 = pTbCache;
832
833 /*
834 * Initialize the native executable memory allocator.
835 */
836#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
837 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
838 AssertLogRelRCReturn(rc, rc);
839#else
840 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
841#endif
842
843 return VINF_SUCCESS;
844}
845
846
847/**
848 * Inner free worker.
849 */
850static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
851 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
852{
853 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
854 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
855 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
856 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
857
858 /*
859 * Unlink the TB from the hash table.
860 */
861 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
862
863 /*
864 * Free the TB itself.
865 */
866 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
867 {
868 case IEMTB_F_TYPE_THREADED:
869 pTbAllocator->cThreadedTbs -= 1;
870 RTMemFree(pTb->Thrd.paCalls);
871 break;
872#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
873 case IEMTB_F_TYPE_NATIVE:
874 pTbAllocator->cNativeTbs -= 1;
875 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
876 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
877 break;
878#endif
879 default:
880 AssertFailed();
881 }
882 RTMemFree(pTb->pabOpcodes);
883
884 pTb->pNext = NULL;
885 pTb->fFlags = 0;
886 pTb->GCPhysPc = UINT64_MAX;
887 pTb->Gen.uPtr = 0;
888 pTb->Gen.uData = 0;
889 pTb->cbOpcodes = 0;
890 pTb->pabOpcodes = NULL;
891
892 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
893 Assert(pTbAllocator->cInUseTbs > 0);
894
895 pTbAllocator->cInUseTbs -= 1;
896 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
897}
898
899
900/**
901 * Frees the given TB.
902 *
903 * @param pVCpu The cross context virtual CPU structure of the calling
904 * thread.
905 * @param pTb The translation block to free.
906 * @thread EMT(pVCpu)
907 */
908static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
909{
910 /*
911 * Validate state.
912 */
913 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
914 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
915 uint8_t const idxChunk = pTb->idxAllocChunk;
916 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
917 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
918 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
919
920 /*
921 * Call inner worker.
922 */
923 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
924}
925
926
927/**
928 * Schedules a native TB for freeing when it's not longer being executed and
929 * part of the caller's call stack.
930 *
931 * The TB will be removed from the translation block cache, though, so it isn't
932 * possible to executed it again and the IEMTB::pNext member can be used to link
933 * it together with other TBs awaiting freeing.
934 *
935 * @param pVCpu The cross context virtual CPU structure of the calling
936 * thread.
937 * @param pTb The translation block to schedule for freeing.
938 */
939static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
940{
941 /*
942 * Validate state.
943 */
944 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
945 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
946 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
947 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
948 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
949 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
950 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
951 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
952
953 /*
954 * Remove it from the cache and prepend it to the allocator's todo list.
955 */
956 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
957
958 pTb->pNext = pTbAllocator->pDelayedFreeHead;
959 pTbAllocator->pDelayedFreeHead = pTb;
960}
961
962
963/**
964 * Processes the delayed frees.
965 *
966 * This is called by the allocator function as well as the native recompile
967 * function before making any TB or executable memory allocations respectively.
968 */
969void iemTbAllocatorProcessDelayedFrees(PVMCPU pVCpu, PIEMTBALLOCATOR pTbAllocator)
970{
971 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
972 pTbAllocator->pDelayedFreeHead = NULL;
973 while (pTb)
974 {
975 PIEMTB const pTbNext = pTb->pNext;
976 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
977 iemTbAlloctorScheduleForFree(pVCpu, pTb);
978 pTb = pTbNext;
979 }
980}
981
982
983/**
984 * Grow the translation block allocator with another chunk.
985 */
986static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
987{
988 /*
989 * Validate state.
990 */
991 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
992 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
993 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
994 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
995 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
996
997 /*
998 * Allocate a new chunk and add it to the allocator.
999 */
1000 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1001 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1002 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1003
1004 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1005 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1006 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1007 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1008 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1009 pTbAllocator->cTotalTbs += cTbsPerChunk;
1010 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1011
1012 return VINF_SUCCESS;
1013}
1014
1015
1016/**
1017 * Allocates a TB from allocator with free block.
1018 *
1019 * This is common code to both the fast and slow allocator code paths.
1020 */
1021DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1022{
1023 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1024
1025 int idxTb;
1026 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1027 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1028 pTbAllocator->cTotalTbs,
1029 pTbAllocator->iStartHint & ~(uint32_t)63);
1030 else
1031 idxTb = -1;
1032 if (idxTb < 0)
1033 {
1034 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1035 AssertLogRelReturn(idxTb >= 0, NULL);
1036 }
1037 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1038 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1039
1040 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1041 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1042 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1043 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1044 Assert(pTb->idxAllocChunk == idxChunk);
1045
1046 pTbAllocator->cInUseTbs += 1;
1047 if (fThreaded)
1048 pTbAllocator->cThreadedTbs += 1;
1049 else
1050 pTbAllocator->cNativeTbs += 1;
1051 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1052 return pTb;
1053}
1054
1055
1056/**
1057 * Slow path for iemTbAllocatorAlloc.
1058 */
1059static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1060{
1061 /*
1062 * With some luck we can add another chunk.
1063 */
1064 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1065 {
1066 int rc = iemTbAllocatorGrow(pVCpu);
1067 if (RT_SUCCESS(rc))
1068 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1069 }
1070
1071 /*
1072 * We have to prune stuff. Sigh.
1073 *
1074 * This requires scanning for older TBs and kick them out. Not sure how to
1075 * best do this as we don't want to maintain any list of TBs ordered by last
1076 * usage time. But one reasonably simple approach would be that each time we
1077 * get here we continue a sequential scan of the allocation chunks,
1078 * considering just a smallish number of TBs and freeing a fixed portion of
1079 * them. Say, we consider the next 128 TBs, freeing the least recently used
1080 * in out of groups of 4 TBs, resulting in 32 free TBs.
1081 */
1082 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1083 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1084 uint32_t const cTbsToPrune = 128;
1085 uint32_t const cTbsPerGroup = 4;
1086 uint32_t cFreedTbs = 0;
1087#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1088 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1089#else
1090 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1091#endif
1092 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1093 idxTbPruneFrom = 0;
1094 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1095 {
1096 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1097 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1098 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1099 uint32_t cMsAge = msNow - pTb->msLastUsed;
1100 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1101
1102 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1103 {
1104#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1105 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1106 { /* likely */ }
1107 else
1108 {
1109 idxInChunk2 = 0;
1110 idxChunk2 += 1;
1111 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1112 idxChunk2 = 0;
1113 }
1114#endif
1115 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1116 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1117 if ( cMsAge2 > cMsAge
1118 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1119 {
1120 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1121 pTb = pTb2;
1122 idxChunk = idxChunk2;
1123 idxInChunk = idxInChunk2;
1124 cMsAge = cMsAge2;
1125 }
1126 }
1127
1128 /* Free the TB. */
1129 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1130 cFreedTbs++; /* paranoia */
1131 }
1132 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1133 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1134
1135 /*
1136 * Allocate a TB from the ones we've pruned.
1137 */
1138 if (cFreedTbs)
1139 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1140 return NULL;
1141}
1142
1143
1144/**
1145 * Allocate a translation block.
1146 *
1147 * @returns Pointer to block on success, NULL if we're out and is unable to
1148 * free up an existing one (very unlikely once implemented).
1149 * @param pVCpu The cross context virtual CPU structure of the calling
1150 * thread.
1151 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1152 * For statistics.
1153 */
1154DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1155{
1156 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1157 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1158
1159 /* Free any pending TBs before we proceed. */
1160 if (!pTbAllocator->pDelayedFreeHead)
1161 { /* probably likely */ }
1162 else
1163 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1164
1165 /* If the allocator is full, take slow code path.*/
1166 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1167 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1168 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1169}
1170
1171
1172
1173/*********************************************************************************************************************************
1174* Threaded Recompiler Core *
1175*********************************************************************************************************************************/
1176
1177/**
1178 * Allocate a translation block for threadeded recompilation.
1179 *
1180 * This is allocated with maxed out call table and storage for opcode bytes,
1181 * because it's only supposed to be called once per EMT to allocate the TB
1182 * pointed to by IEMCPU::pThrdCompileTbR3.
1183 *
1184 * @returns Pointer to the translation block on success, NULL on failure.
1185 * @param pVM The cross context virtual machine structure.
1186 * @param pVCpu The cross context virtual CPU structure of the calling
1187 * thread.
1188 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1189 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1190 */
1191static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1192{
1193 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1194 if (pTb)
1195 {
1196 unsigned const cCalls = 256;
1197 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1198 if (pTb->Thrd.paCalls)
1199 {
1200 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1201 if (pTb->pabOpcodes)
1202 {
1203 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1204 pTb->Thrd.cAllocated = cCalls;
1205 pTb->Thrd.cCalls = 0;
1206 pTb->cbOpcodes = 0;
1207 pTb->pNext = NULL;
1208 pTb->cUsed = 0;
1209 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1210 pTb->idxAllocChunk = UINT8_MAX;
1211 pTb->GCPhysPc = GCPhysPc;
1212 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1213 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1214 pTb->cInstructions = 0;
1215
1216 /* Init the first opcode range. */
1217 pTb->cRanges = 1;
1218 pTb->aRanges[0].cbOpcodes = 0;
1219 pTb->aRanges[0].offOpcodes = 0;
1220 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1221 pTb->aRanges[0].u2Unused = 0;
1222 pTb->aRanges[0].idxPhysPage = 0;
1223 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1224 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1225
1226 return pTb;
1227 }
1228 RTMemFree(pTb->Thrd.paCalls);
1229 }
1230 RTMemFree(pTb);
1231 }
1232 RT_NOREF(pVM);
1233 return NULL;
1234}
1235
1236
1237/**
1238 * Called on the TB that are dedicated for recompilation before it's reused.
1239 *
1240 * @param pVCpu The cross context virtual CPU structure of the calling
1241 * thread.
1242 * @param pTb The translation block to reuse.
1243 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1244 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1245 */
1246static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1247{
1248 pTb->GCPhysPc = GCPhysPc;
1249 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1250 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1251 pTb->Thrd.cCalls = 0;
1252 pTb->cbOpcodes = 0;
1253 pTb->cInstructions = 0;
1254
1255 /* Init the first opcode range. */
1256 pTb->cRanges = 1;
1257 pTb->aRanges[0].cbOpcodes = 0;
1258 pTb->aRanges[0].offOpcodes = 0;
1259 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1260 pTb->aRanges[0].u2Unused = 0;
1261 pTb->aRanges[0].idxPhysPage = 0;
1262 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1263 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1264}
1265
1266
1267/**
1268 * Used to duplicate a threded translation block after recompilation is done.
1269 *
1270 * @returns Pointer to the translation block on success, NULL on failure.
1271 * @param pVM The cross context virtual machine structure.
1272 * @param pVCpu The cross context virtual CPU structure of the calling
1273 * thread.
1274 * @param pTbSrc The TB to duplicate.
1275 */
1276static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1277{
1278 /*
1279 * Just using the heap for now. Will make this more efficient and
1280 * complicated later, don't worry. :-)
1281 */
1282 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1283 if (pTb)
1284 {
1285 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1286 memcpy(pTb, pTbSrc, sizeof(*pTb));
1287 pTb->idxAllocChunk = idxAllocChunk;
1288
1289 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1290 Assert(cCalls > 0);
1291 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1292 if (pTb->Thrd.paCalls)
1293 {
1294 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1295 Assert(cbOpcodes > 0);
1296 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1297 if (pTb->pabOpcodes)
1298 {
1299 pTb->Thrd.cAllocated = cCalls;
1300 pTb->pNext = NULL;
1301 pTb->cUsed = 0;
1302 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1303 pTb->fFlags = pTbSrc->fFlags;
1304
1305 return pTb;
1306 }
1307 RTMemFree(pTb->Thrd.paCalls);
1308 }
1309 iemTbAllocatorFree(pVCpu, pTb);
1310 }
1311 RT_NOREF(pVM);
1312 return NULL;
1313
1314}
1315
1316
1317/**
1318 * Adds the given TB to the hash table.
1319 *
1320 * @param pVCpu The cross context virtual CPU structure of the calling
1321 * thread.
1322 * @param pTbCache The cache to add it to.
1323 * @param pTb The translation block to add.
1324 */
1325static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1326{
1327 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1328
1329 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1330 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1331 if (LogIs12Enabled())
1332 {
1333 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1334 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1335 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1336 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1337 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1338 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1339 pTb->aRanges[idxRange].idxPhysPage == 0
1340 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1341 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1342 }
1343}
1344
1345
1346/**
1347 * Called by opcode verifier functions when they detect a problem.
1348 */
1349void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1350{
1351 /* Unless it's safe, we can only immediately free threaded TB, as we will
1352 have more code left to execute in native TBs when fSafeToFree == false. */
1353 if (fSafeToFree || (pTb->fFlags & IEMTB_F_TYPE_THREADED))
1354 iemTbAllocatorFree(pVCpu, pTb);
1355 else
1356 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1357}
1358
1359
1360/*
1361 * Real code.
1362 */
1363
1364#ifdef LOG_ENABLED
1365/**
1366 * Logs the current instruction.
1367 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1368 * @param pszFunction The IEM function doing the execution.
1369 * @param idxInstr The instruction number in the block.
1370 */
1371static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1372{
1373# ifdef IN_RING3
1374 if (LogIs2Enabled())
1375 {
1376 char szInstr[256];
1377 uint32_t cbInstr = 0;
1378 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1379 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1380 szInstr, sizeof(szInstr), &cbInstr);
1381
1382 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1383 Log2(("**** %s fExec=%x pTb=%p #%u\n"
1384 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1385 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1386 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1387 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1388 " %s\n"
1389 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, idxInstr,
1390 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1391 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1392 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1393 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1394 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1395 szInstr));
1396
1397 if (LogIs3Enabled())
1398 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);
1399 }
1400 else
1401# endif
1402 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1403 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1404}
1405#endif /* LOG_ENABLED */
1406
1407
1408#if 0
1409static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1410{
1411 RT_NOREF(pVM, pVCpu);
1412 return rcStrict;
1413}
1414#endif
1415
1416
1417/**
1418 * Initializes the decoder state when compiling TBs.
1419 *
1420 * This presumes that fExec has already be initialized.
1421 *
1422 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1423 * to apply fixes to them as well.
1424 *
1425 * @param pVCpu The cross context virtual CPU structure of the calling
1426 * thread.
1427 * @param fReInit Clear for the first call for a TB, set for subsequent
1428 * calls from inside the compile loop where we can skip a
1429 * couple of things.
1430 * @param fExtraFlags The extra translation block flags when @a fReInit is
1431 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1432 * checked.
1433 */
1434DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1435{
1436 /* ASSUMES: That iemInitExec was already called and that anyone changing
1437 CPU state affecting the fExec bits since then will have updated fExec! */
1438 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1439 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1440
1441 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1442
1443 /* Decoder state: */
1444 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1445 pVCpu->iem.s.enmEffAddrMode = enmMode;
1446 if (enmMode != IEMMODE_64BIT)
1447 {
1448 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1449 pVCpu->iem.s.enmEffOpSize = enmMode;
1450 }
1451 else
1452 {
1453 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1454 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1455 }
1456 pVCpu->iem.s.fPrefixes = 0;
1457 pVCpu->iem.s.uRexReg = 0;
1458 pVCpu->iem.s.uRexB = 0;
1459 pVCpu->iem.s.uRexIndex = 0;
1460 pVCpu->iem.s.idxPrefix = 0;
1461 pVCpu->iem.s.uVex3rdReg = 0;
1462 pVCpu->iem.s.uVexLength = 0;
1463 pVCpu->iem.s.fEvexStuff = 0;
1464 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1465 pVCpu->iem.s.offModRm = 0;
1466 pVCpu->iem.s.iNextMapping = 0;
1467
1468 if (!fReInit)
1469 {
1470 pVCpu->iem.s.cActiveMappings = 0;
1471 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1472 pVCpu->iem.s.fEndTb = false;
1473 pVCpu->iem.s.fTbCheckOpcodes = false;
1474 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1475 pVCpu->iem.s.fTbCrossedPage = false;
1476 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1477 pVCpu->iem.s.fTbCurInstrIsSti = false;
1478 /* Force RF clearing and TF checking on first instruction in the block
1479 as we don't really know what came before and should assume the worst: */
1480 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1481 }
1482 else
1483 {
1484 Assert(pVCpu->iem.s.cActiveMappings == 0);
1485 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1486 Assert(pVCpu->iem.s.fEndTb == false);
1487 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1488 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1489 }
1490 pVCpu->iem.s.fTbCurInstr = 0;
1491
1492#ifdef DBGFTRACE_ENABLED
1493 switch (IEM_GET_CPU_MODE(pVCpu))
1494 {
1495 case IEMMODE_64BIT:
1496 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1497 break;
1498 case IEMMODE_32BIT:
1499 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1500 break;
1501 case IEMMODE_16BIT:
1502 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1503 break;
1504 }
1505#endif
1506}
1507
1508
1509/**
1510 * Initializes the opcode fetcher when starting the compilation.
1511 *
1512 * @param pVCpu The cross context virtual CPU structure of the calling
1513 * thread.
1514 */
1515DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1516{
1517 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1518#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1519 pVCpu->iem.s.offOpcode = 0;
1520#else
1521 RT_NOREF(pVCpu);
1522#endif
1523}
1524
1525
1526/**
1527 * Re-initializes the opcode fetcher between instructions while compiling.
1528 *
1529 * @param pVCpu The cross context virtual CPU structure of the calling
1530 * thread.
1531 */
1532DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1533{
1534 if (pVCpu->iem.s.pbInstrBuf)
1535 {
1536 uint64_t off = pVCpu->cpum.GstCtx.rip;
1537 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1538 off += pVCpu->cpum.GstCtx.cs.u64Base;
1539 off -= pVCpu->iem.s.uInstrBufPc;
1540 if (off < pVCpu->iem.s.cbInstrBufTotal)
1541 {
1542 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1543 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1544 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1545 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1546 else
1547 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1548 }
1549 else
1550 {
1551 pVCpu->iem.s.pbInstrBuf = NULL;
1552 pVCpu->iem.s.offInstrNextByte = 0;
1553 pVCpu->iem.s.offCurInstrStart = 0;
1554 pVCpu->iem.s.cbInstrBuf = 0;
1555 pVCpu->iem.s.cbInstrBufTotal = 0;
1556 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1557 }
1558 }
1559 else
1560 {
1561 pVCpu->iem.s.offInstrNextByte = 0;
1562 pVCpu->iem.s.offCurInstrStart = 0;
1563 pVCpu->iem.s.cbInstrBuf = 0;
1564 pVCpu->iem.s.cbInstrBufTotal = 0;
1565#ifdef VBOX_STRICT
1566 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1567#endif
1568 }
1569#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1570 pVCpu->iem.s.offOpcode = 0;
1571#endif
1572}
1573
1574
1575DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1576{
1577 switch (cbInstr)
1578 {
1579 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1580 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1581 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1582 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1583 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1584 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1585 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1586 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1587 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1588 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1589 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1590 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1591 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1592 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1593 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1594 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1595 }
1596}
1597
1598
1599/**
1600 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1601 *
1602 * - CS LIM check required.
1603 * - Must recheck opcode bytes.
1604 * - Previous instruction branched.
1605 * - TLB load detected, probably due to page crossing.
1606 *
1607 * @returns true if everything went well, false if we're out of space in the TB
1608 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1609 * @param pVCpu The cross context virtual CPU structure of the calling
1610 * thread.
1611 * @param pTb The translation block being compiled.
1612 */
1613bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1614{
1615 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1616 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1617#if 0
1618 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1619 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1620#endif
1621
1622 /*
1623 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1624 * see if it's needed to start checking.
1625 */
1626 bool fConsiderCsLimChecking;
1627 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1628 if ( fMode == IEM_F_MODE_X86_64BIT
1629 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
1630 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
1631 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
1632 fConsiderCsLimChecking = false; /* already enabled or not needed */
1633 else
1634 {
1635 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1636 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1637 fConsiderCsLimChecking = true; /* likely */
1638 else
1639 {
1640 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
1641 return false;
1642 }
1643 }
1644
1645 /*
1646 * Prepare call now, even before we know if can accept the instruction in this TB.
1647 * This allows us amending parameters w/o making every case suffer.
1648 */
1649 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1650 uint16_t const offOpcode = pTb->cbOpcodes;
1651 uint8_t idxRange = pTb->cRanges - 1;
1652
1653 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1654 pCall->idxInstr = pTb->cInstructions;
1655 pCall->offOpcode = offOpcode;
1656 pCall->idxRange = idxRange;
1657 pCall->cbOpcode = cbInstr;
1658 pCall->auParams[0] = cbInstr;
1659 pCall->auParams[1] = idxRange;
1660 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1661
1662/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1663 * gotten onto. If we do, stop */
1664
1665 /*
1666 * Case 1: We've branched (RIP changed).
1667 *
1668 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1669 * Req: 1 extra range, no extra phys.
1670 *
1671 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1672 * necessary (fTbCrossedPage is true).
1673 * Req: 1 extra range, probably 1 extra phys page entry.
1674 *
1675 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1676 * but in addition we cross into the following page and require
1677 * another TLB load.
1678 * Req: 2 extra ranges, probably 2 extra phys page entries.
1679 *
1680 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1681 * the following page (thus fTbCrossedPage is true).
1682 * Req: 2 extra ranges, probably 1 extra phys page entry.
1683 *
1684 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1685 * it may trigger "spuriously" from the CPU point of view because of
1686 * physical page changes that'll invalid the physical TLB and trigger a
1687 * call to the function. In theory this be a big deal, just a bit
1688 * performance loss as we'll pick the LoadingTlb variants.
1689 *
1690 * Note! We do not currently optimize branching to the next instruction (sorry
1691 * 32-bit PIC code). We could maybe do that in the branching code that
1692 * sets (or not) fTbBranched.
1693 */
1694 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1695 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1696 * code. This'll require filtering out far jmps and calls, as they
1697 * load CS which should technically be considered indirect since the
1698 * GDT/LDT entry's base address can be modified independently from
1699 * the code. */
1700 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
1701 {
1702 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1703 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1704 {
1705 /* 1a + 1b - instruction fully within the branched to page. */
1706 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1707 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1708
1709 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1710 {
1711 /* Check that we've got a free range. */
1712 idxRange += 1;
1713 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1714 { /* likely */ }
1715 else
1716 {
1717 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1718 return false;
1719 }
1720 pCall->idxRange = idxRange;
1721 pCall->auParams[1] = idxRange;
1722 pCall->auParams[2] = 0;
1723
1724 /* Check that we've got a free page slot. */
1725 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1726 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1727 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1728 pTb->aRanges[idxRange].idxPhysPage = 0;
1729 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1730 || pTb->aGCPhysPages[0] == GCPhysNew)
1731 {
1732 pTb->aGCPhysPages[0] = GCPhysNew;
1733 pTb->aRanges[idxRange].idxPhysPage = 1;
1734 }
1735 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1736 || pTb->aGCPhysPages[1] == GCPhysNew)
1737 {
1738 pTb->aGCPhysPages[1] = GCPhysNew;
1739 pTb->aRanges[idxRange].idxPhysPage = 2;
1740 }
1741 else
1742 {
1743 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1744 return false;
1745 }
1746
1747 /* Finish setting up the new range. */
1748 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1749 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1750 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1751 pTb->aRanges[idxRange].u2Unused = 0;
1752 pTb->cRanges++;
1753 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
1754 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
1755 pTb->aRanges[idxRange].offOpcodes));
1756 }
1757 else
1758 {
1759 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1760 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1761 }
1762
1763 /* Determin which function we need to load & check.
1764 Note! For jumps to a new page, we'll set both fTbBranched and
1765 fTbCrossedPage to avoid unnecessary TLB work for intra
1766 page branching */
1767 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1768 || pVCpu->iem.s.fTbCrossedPage)
1769 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1770 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1771 : !fConsiderCsLimChecking
1772 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1773 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1774 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1775 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1776 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1777 : !fConsiderCsLimChecking
1778 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1779 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1780 else
1781 {
1782 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1783 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1784 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1785 : !fConsiderCsLimChecking
1786 ? kIemThreadedFunc_BltIn_CheckOpcodes
1787 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1788 }
1789 }
1790 else
1791 {
1792 /* 1c + 1d - instruction crosses pages. */
1793 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1794 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1795
1796 /* Lazy bird: Check that this isn't case 1c, since we've already
1797 load the first physical address. End the TB and
1798 make it a case 2b instead.
1799
1800 Hmm. Too much bother to detect, so just do the same
1801 with case 1d as well. */
1802#if 0 /** @todo get back to this later when we've got the actual branch code in
1803 * place. */
1804 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1805
1806 /* Check that we've got two free ranges. */
1807 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1808 { /* likely */ }
1809 else
1810 return false;
1811 idxRange += 1;
1812 pCall->idxRange = idxRange;
1813 pCall->auParams[1] = idxRange;
1814 pCall->auParams[2] = 0;
1815
1816 /* ... */
1817
1818#else
1819 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1820 return false;
1821#endif
1822 }
1823 }
1824
1825 /*
1826 * Case 2: Page crossing.
1827 *
1828 * Sub-case 2a: The instruction starts on the first byte in the next page.
1829 *
1830 * Sub-case 2b: The instruction has opcode bytes in both the current and
1831 * following page.
1832 *
1833 * Both cases requires a new range table entry and probably a new physical
1834 * page entry. The difference is in which functions to emit and whether to
1835 * add bytes to the current range.
1836 */
1837 else if (pVCpu->iem.s.fTbCrossedPage)
1838 {
1839 /* Check that we've got a free range. */
1840 idxRange += 1;
1841 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1842 { /* likely */ }
1843 else
1844 {
1845 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1846 return false;
1847 }
1848
1849 /* Check that we've got a free page slot. */
1850 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1851 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1852 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1853 pTb->aRanges[idxRange].idxPhysPage = 0;
1854 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1855 || pTb->aGCPhysPages[0] == GCPhysNew)
1856 {
1857 pTb->aGCPhysPages[0] = GCPhysNew;
1858 pTb->aRanges[idxRange].idxPhysPage = 1;
1859 }
1860 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1861 || pTb->aGCPhysPages[1] == GCPhysNew)
1862 {
1863 pTb->aGCPhysPages[1] = GCPhysNew;
1864 pTb->aRanges[idxRange].idxPhysPage = 2;
1865 }
1866 else
1867 {
1868 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1869 return false;
1870 }
1871
1872 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
1873 {
1874 Assert(pVCpu->iem.s.offCurInstrStart == 0);
1875 pCall->idxRange = idxRange;
1876 pCall->auParams[1] = idxRange;
1877 pCall->auParams[2] = 0;
1878
1879 /* Finish setting up the new range. */
1880 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1881 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1882 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1883 pTb->aRanges[idxRange].u2Unused = 0;
1884 pTb->cRanges++;
1885 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
1886 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
1887 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
1888
1889 /* Determin which function we need to load & check. */
1890 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1891 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1892 : !fConsiderCsLimChecking
1893 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1894 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
1895 }
1896 else
1897 {
1898 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1899 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1900 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1901 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
1902
1903 /* We've good. Split the instruction over the old and new range table entries. */
1904 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
1905
1906 pTb->aRanges[idxRange].offPhysPage = 0;
1907 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
1908 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
1909 pTb->aRanges[idxRange].u2Unused = 0;
1910 pTb->cRanges++;
1911 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
1912 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
1913 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
1914
1915 /* Determin which function we need to load & check. */
1916 if (pVCpu->iem.s.fTbCheckOpcodes)
1917 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1918 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1919 : !fConsiderCsLimChecking
1920 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1921 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
1922 else
1923 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1924 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1925 : !fConsiderCsLimChecking
1926 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1927 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
1928 }
1929 }
1930
1931 /*
1932 * Regular case: No new range required.
1933 */
1934 else
1935 {
1936 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
1937 if (pVCpu->iem.s.fTbCheckOpcodes)
1938 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1939 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1940 : kIemThreadedFunc_BltIn_CheckOpcodes;
1941 else
1942 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
1943
1944 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1945 pTb->cbOpcodes = offOpcode + cbInstr;
1946 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1947 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
1948 }
1949
1950 /*
1951 * Commit the call.
1952 */
1953 pTb->Thrd.cCalls++;
1954
1955 /*
1956 * Clear state.
1957 */
1958 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1959 pVCpu->iem.s.fTbCrossedPage = false;
1960 pVCpu->iem.s.fTbCheckOpcodes = false;
1961
1962 /*
1963 * Copy opcode bytes.
1964 */
1965 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1966 pTb->cbOpcodes = offOpcode + cbInstr;
1967 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
1968
1969 return true;
1970}
1971
1972
1973/**
1974 * Worker for iemThreadedCompileBeginEmitCallsComplications and
1975 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
1976 *
1977 * @returns true if anything is pending, false if not.
1978 * @param pVCpu The cross context virtual CPU structure of the calling
1979 * thread.
1980 */
1981DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
1982{
1983 uint64_t fCpu = pVCpu->fLocalForcedActions;
1984 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
1985#if 1
1986 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
1987 if (RT_LIKELY( !fCpu
1988 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1989 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
1990 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
1991 return false;
1992 return true;
1993#else
1994 return false;
1995#endif
1996
1997}
1998
1999
2000/**
2001 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2002 * set.
2003 *
2004 * @returns true if we should continue, false if an IRQ is deliverable or a
2005 * relevant force flag is pending.
2006 * @param pVCpu The cross context virtual CPU structure of the calling
2007 * thread.
2008 * @param pTb The translation block being compiled.
2009 * @sa iemThreadedCompileCheckIrq
2010 */
2011bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2012{
2013 /*
2014 * Skip this we've already emitted a call after the previous instruction
2015 * or if it's the first call, as we're always checking FFs between blocks.
2016 */
2017 uint32_t const idxCall = pTb->Thrd.cCalls;
2018 if ( idxCall > 0
2019 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2020 {
2021 /* Emit the call. */
2022 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2023 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2024 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2025 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2026 pCall->idxInstr = pTb->cInstructions;
2027 pCall->uUnused0 = 0;
2028 pCall->offOpcode = 0;
2029 pCall->cbOpcode = 0;
2030 pCall->idxRange = 0;
2031 pCall->auParams[0] = 0;
2032 pCall->auParams[1] = 0;
2033 pCall->auParams[2] = 0;
2034 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2035
2036 /* Reset the IRQ check value. */
2037 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2038
2039 /*
2040 * Check for deliverable IRQs and pending force flags.
2041 */
2042 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2043 }
2044 return true; /* continue */
2045}
2046
2047
2048/**
2049 * Emits an IRQ check call and checks for pending IRQs.
2050 *
2051 * @returns true if we should continue, false if an IRQ is deliverable or a
2052 * relevant force flag is pending.
2053 * @param pVCpu The cross context virtual CPU structure of the calling
2054 * thread.
2055 * @param pTb The transation block.
2056 * @sa iemThreadedCompileBeginEmitCallsComplications
2057 */
2058static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2059{
2060 /* Check again in a little bit, unless it is immediately following an STI
2061 in which case we *must* check immediately after the next instruction
2062 as well in case it's executed with interrupt inhibition. We could
2063 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2064 bs3-timers-1 which is doing sti + sti + cli. */
2065 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2066 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2067 else
2068 {
2069 pVCpu->iem.s.fTbCurInstrIsSti = false;
2070 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2071 }
2072 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2073
2074 /*
2075 * Emit the call.
2076 */
2077 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2078 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2079 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2080 pCall->idxInstr = pTb->cInstructions;
2081 pCall->uUnused0 = 0;
2082 pCall->offOpcode = 0;
2083 pCall->cbOpcode = 0;
2084 pCall->idxRange = 0;
2085 pCall->auParams[0] = 0;
2086 pCall->auParams[1] = 0;
2087 pCall->auParams[2] = 0;
2088
2089 /*
2090 * Check for deliverable IRQs and pending force flags.
2091 */
2092 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2093}
2094
2095
2096/**
2097 * Compiles a new TB and executes it.
2098 *
2099 * We combine compilation and execution here as it makes it simpler code flow
2100 * in the main loop and it allows interpreting while compiling if we want to
2101 * explore that option.
2102 *
2103 * @returns Strict VBox status code.
2104 * @param pVM The cross context virtual machine structure.
2105 * @param pVCpu The cross context virtual CPU structure of the calling
2106 * thread.
2107 * @param GCPhysPc The physical address corresponding to the current
2108 * RIP+CS.BASE.
2109 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2110 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2111 */
2112static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2113{
2114 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2115 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2116
2117 /*
2118 * Get the TB we use for the recompiling. This is a maxed-out TB so
2119 * that'll we'll make a more efficient copy of when we're done compiling.
2120 */
2121 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2122 if (pTb)
2123 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2124 else
2125 {
2126 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2127 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2128 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2129 }
2130
2131 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2132 functions may get at it. */
2133 pVCpu->iem.s.pCurTbR3 = pTb;
2134
2135#if 0
2136 /* Make sure the CheckIrq condition matches the one in EM. */
2137 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2138 const uint32_t cZeroCalls = 1;
2139#else
2140 const uint32_t cZeroCalls = 0;
2141#endif
2142
2143 /*
2144 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2145 */
2146 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2147 iemThreadedCompileInitOpcodeFetching(pVCpu);
2148 VBOXSTRICTRC rcStrict;
2149 for (;;)
2150 {
2151 /* Process the next instruction. */
2152#ifdef LOG_ENABLED
2153 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2154 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2155 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2156#endif
2157 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2158 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2159
2160 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2161 if ( rcStrict == VINF_SUCCESS
2162 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2163 && !pVCpu->iem.s.fEndTb)
2164 {
2165 Assert(pTb->Thrd.cCalls > cCallsPrev);
2166 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2167
2168 pVCpu->iem.s.cInstructions++;
2169 }
2170 else
2171 {
2172 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2173 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2174 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2175 rcStrict = VINF_SUCCESS;
2176
2177 if (pTb->Thrd.cCalls > cZeroCalls)
2178 {
2179 if (cCallsPrev != pTb->Thrd.cCalls)
2180 pVCpu->iem.s.cInstructions++;
2181 break;
2182 }
2183
2184 pVCpu->iem.s.pCurTbR3 = NULL;
2185 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2186 }
2187
2188 /* Check for IRQs? */
2189 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2190 pVCpu->iem.s.cInstrTillIrqCheck--;
2191 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2192 break;
2193
2194 /* Still space in the TB? */
2195 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2196 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated)
2197 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2198 else
2199 {
2200 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2201 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2202 break;
2203 }
2204 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2205 }
2206
2207 /*
2208 * Duplicate the TB into a completed one and link it.
2209 */
2210 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2211 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2212
2213 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2214
2215#ifdef IEM_COMPILE_ONLY_MODE
2216 /*
2217 * Execute the translation block.
2218 */
2219#endif
2220
2221 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2222}
2223
2224
2225
2226/*********************************************************************************************************************************
2227* Recompiled Execution Core *
2228*********************************************************************************************************************************/
2229
2230/**
2231 * Executes a translation block.
2232 *
2233 * @returns Strict VBox status code.
2234 * @param pVCpu The cross context virtual CPU structure of the calling
2235 * thread.
2236 * @param pTb The translation block to execute.
2237 */
2238static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2239{
2240 /*
2241 * Check the opcodes in the first page before starting execution.
2242 */
2243 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2244 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2245 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2246 { /* likely */ }
2247 else
2248 {
2249 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2250 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2251 return VINF_SUCCESS;
2252 }
2253
2254 /*
2255 * Set the current TB so CIMPL functions may get at it.
2256 */
2257 pVCpu->iem.s.pCurTbR3 = pTb;
2258
2259 /*
2260 * Execute the block.
2261 */
2262#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2263 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2264 {
2265 pVCpu->iem.s.cTbExecNative++;
2266# ifdef LOG_ENABLED
2267 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2268# endif
2269# ifdef RT_ARCH_AMD64
2270 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2271# else
2272 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2273# endif
2274 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2275 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2276 { /* likely */ }
2277 else
2278 {
2279 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2280 pVCpu->iem.s.pCurTbR3 = NULL;
2281 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2282
2283 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2284 only to break out of TB execution early. */
2285 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2286 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2287 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2288 }
2289 }
2290 else
2291#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2292 {
2293 /*
2294 * The threaded execution loop.
2295 */
2296 pVCpu->iem.s.cTbExecThreaded++;
2297#ifdef LOG_ENABLED
2298 uint64_t uRipPrev = UINT64_MAX;
2299#endif
2300 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2301 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2302 while (cCallsLeft-- > 0)
2303 {
2304#ifdef LOG_ENABLED
2305 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2306 {
2307 uRipPrev = pVCpu->cpum.GstCtx.rip;
2308 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2309 }
2310 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2311 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2312 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2313#endif
2314 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2315 pCallEntry->auParams[0],
2316 pCallEntry->auParams[1],
2317 pCallEntry->auParams[2]);
2318 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2319 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2320 pCallEntry++;
2321 else
2322 {
2323 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2324 pVCpu->iem.s.pCurTbR3 = NULL;
2325 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2326
2327 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2328 only to break out of TB execution early. */
2329 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2330 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2331 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2332 }
2333 }
2334 }
2335
2336 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2337 pVCpu->iem.s.pCurTbR3 = NULL;
2338 return VINF_SUCCESS;
2339}
2340
2341
2342/**
2343 * This is called when the PC doesn't match the current pbInstrBuf.
2344 *
2345 * Upon return, we're ready for opcode fetching. But please note that
2346 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2347 * MMIO or unassigned).
2348 */
2349static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2350{
2351 pVCpu->iem.s.pbInstrBuf = NULL;
2352 pVCpu->iem.s.offCurInstrStart = 0;
2353 pVCpu->iem.s.offInstrNextByte = 0;
2354 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2355 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2356}
2357
2358
2359/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2360DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2361{
2362 /*
2363 * Set uCurTbStartPc to RIP and calc the effective PC.
2364 */
2365 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2366 pVCpu->iem.s.uCurTbStartPc = uPc;
2367 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2368 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2369
2370 /*
2371 * Advance within the current buffer (PAGE) when possible.
2372 */
2373 if (pVCpu->iem.s.pbInstrBuf)
2374 {
2375 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2376 if (off < pVCpu->iem.s.cbInstrBufTotal)
2377 {
2378 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2379 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2380 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2381 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2382 else
2383 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2384
2385 return pVCpu->iem.s.GCPhysInstrBuf + off;
2386 }
2387 }
2388 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2389}
2390
2391
2392/**
2393 * Determines the extra IEMTB_F_XXX flags.
2394 *
2395 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2396 * IEMTB_F_CS_LIM_CHECKS (or zero).
2397 * @param pVCpu The cross context virtual CPU structure of the calling
2398 * thread.
2399 */
2400DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2401{
2402 uint32_t fRet = 0;
2403
2404 /*
2405 * Determine the inhibit bits.
2406 */
2407 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2408 { /* typical */ }
2409 else
2410 {
2411 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2412 fRet |= IEMTB_F_INHIBIT_SHADOW;
2413 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2414 fRet |= IEMTB_F_INHIBIT_NMI;
2415 }
2416
2417 /*
2418 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2419 * likely to go invalid before the end of the translation block.
2420 */
2421 if (IEM_IS_64BIT_CODE(pVCpu))
2422 return fRet;
2423
2424 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2425 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2426 return fRet;
2427 return fRet | IEMTB_F_CS_LIM_CHECKS;
2428}
2429
2430
2431VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2432{
2433 /*
2434 * See if there is an interrupt pending in TRPM, inject it if we can.
2435 */
2436 if (!TRPMHasTrap(pVCpu))
2437 { /* likely */ }
2438 else
2439 {
2440 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2441 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2442 { /*likely */ }
2443 else
2444 return rcStrict;
2445 }
2446
2447 /*
2448 * Init the execution environment.
2449 */
2450 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2451 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2452 { }
2453 else
2454 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2455
2456 /*
2457 * Run-loop.
2458 *
2459 * If we're using setjmp/longjmp we combine all the catching here to avoid
2460 * having to call setjmp for each block we're executing.
2461 */
2462 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2463 for (;;)
2464 {
2465 PIEMTB pTb = NULL;
2466 VBOXSTRICTRC rcStrict;
2467 IEM_TRY_SETJMP(pVCpu, rcStrict)
2468 {
2469 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2470 for (uint32_t iIterations = 0; ; iIterations++)
2471 {
2472 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2473 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2474 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2475
2476 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2477 if (pTb)
2478 rcStrict = iemTbExec(pVCpu, pTb);
2479 else
2480 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2481 if (rcStrict == VINF_SUCCESS)
2482 {
2483 Assert(pVCpu->iem.s.cActiveMappings == 0);
2484
2485 uint64_t fCpu = pVCpu->fLocalForcedActions;
2486 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2487 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2488 | VMCPU_FF_TLB_FLUSH
2489 | VMCPU_FF_UNHALT );
2490 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2491 if (RT_LIKELY( ( !fCpu
2492 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2493 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2494 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2495 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2496 {
2497 if (RT_LIKELY( (iIterations & cPollRate) != 0
2498 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2499 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2500 else
2501 return VINF_SUCCESS;
2502 }
2503 else
2504 return VINF_SUCCESS;
2505 }
2506 else
2507 return rcStrict;
2508 }
2509 }
2510 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2511 {
2512 pVCpu->iem.s.cLongJumps++;
2513 if (pVCpu->iem.s.cActiveMappings > 0)
2514 iemMemRollback(pVCpu);
2515
2516#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2517 /* If pTb isn't NULL we're in iemTbExec. */
2518 if (!pTb)
2519 {
2520 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2521 pTb = pVCpu->iem.s.pCurTbR3;
2522 if (pTb)
2523 {
2524 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
2525 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
2526 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
2527 }
2528 }
2529#endif
2530 return rcStrict;
2531 }
2532 IEM_CATCH_LONGJMP_END(pVCpu);
2533 }
2534}
2535
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette