VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 102448

Last change on this file since 102448 was 102394, checked in by vboxsync, 17 months ago

VMM/IEM: Refactored the native IEM_MC_FETCH_MEM_Uxx code so it can be shared with the store MCs and is a little bit more efficient. Found 2 instructions that wasn't getting FLAT variants because they were using hardcoded addresses (Ov/Ob). Implemented a simple solution for the instruction counting over longjmp/throw. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 103.7 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 102394 2023-11-30 13:28:53Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95
96
97/*
98 * Narrow down configs here to avoid wasting time on unused configs here.
99 */
100
101#ifndef IEM_WITH_CODE_TLB
102# error The code TLB must be enabled for the recompiler.
103#endif
104
105#ifndef IEM_WITH_DATA_TLB
106# error The data TLB must be enabled for the recompiler.
107#endif
108
109#ifndef IEM_WITH_SETJMP
110# error The setjmp approach must be enabled for the recompiler.
111#endif
112
113
114/*********************************************************************************************************************************
115* Internal Functions *
116*********************************************************************************************************************************/
117static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
118
119
120/**
121 * Calculates the effective address of a ModR/M memory operand, extended version
122 * for use in the recompilers.
123 *
124 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
125 *
126 * May longjmp on internal error.
127 *
128 * @return The effective address.
129 * @param pVCpu The cross context virtual CPU structure of the calling thread.
130 * @param bRm The ModRM byte.
131 * @param cbImmAndRspOffset - First byte: The size of any immediate
132 * following the effective address opcode bytes
133 * (only for RIP relative addressing).
134 * - Second byte: RSP displacement (for POP [ESP]).
135 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
136 * SIB byte (bits 39:32).
137 *
138 * @note This must be defined in a source file with matching
139 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
140 * or implemented differently...
141 */
142RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
143{
144 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
145# define SET_SS_DEF() \
146 do \
147 { \
148 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
149 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
150 } while (0)
151
152 if (!IEM_IS_64BIT_CODE(pVCpu))
153 {
154/** @todo Check the effective address size crap! */
155 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
156 {
157 uint16_t u16EffAddr;
158
159 /* Handle the disp16 form with no registers first. */
160 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
161 {
162 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
163 *puInfo = u16EffAddr;
164 }
165 else
166 {
167 /* Get the displacment. */
168 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
169 {
170 case 0: u16EffAddr = 0; break;
171 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
172 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
173 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
174 }
175 *puInfo = u16EffAddr;
176
177 /* Add the base and index registers to the disp. */
178 switch (bRm & X86_MODRM_RM_MASK)
179 {
180 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
181 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
182 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
183 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
184 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
185 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
186 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
187 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
188 }
189 }
190
191 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
192 return u16EffAddr;
193 }
194
195 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
196 uint32_t u32EffAddr;
197 uint64_t uInfo;
198
199 /* Handle the disp32 form with no registers first. */
200 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
201 {
202 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
203 uInfo = u32EffAddr;
204 }
205 else
206 {
207 /* Get the register (or SIB) value. */
208 uInfo = 0;
209 switch ((bRm & X86_MODRM_RM_MASK))
210 {
211 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
212 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
213 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
214 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
215 case 4: /* SIB */
216 {
217 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
218 uInfo = (uint64_t)bSib << 32;
219
220 /* Get the index and scale it. */
221 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
222 {
223 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
224 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
225 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
226 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
227 case 4: u32EffAddr = 0; /*none */ break;
228 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
229 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
230 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
231 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
232 }
233 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
234
235 /* add base */
236 switch (bSib & X86_SIB_BASE_MASK)
237 {
238 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
239 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
240 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
241 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
242 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
243 case 5:
244 if ((bRm & X86_MODRM_MOD_MASK) != 0)
245 {
246 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
247 SET_SS_DEF();
248 }
249 else
250 {
251 uint32_t u32Disp;
252 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
253 u32EffAddr += u32Disp;
254 uInfo |= u32Disp;
255 }
256 break;
257 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
258 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
259 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
260 }
261 break;
262 }
263 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
264 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
265 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
266 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
267 }
268
269 /* Get and add the displacement. */
270 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
271 {
272 case 0:
273 break;
274 case 1:
275 {
276 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
277 u32EffAddr += i8Disp;
278 uInfo |= (uint32_t)(int32_t)i8Disp;
279 break;
280 }
281 case 2:
282 {
283 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
284 u32EffAddr += u32Disp;
285 uInfo |= u32Disp;
286 break;
287 }
288 default:
289 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
290 }
291 }
292
293 *puInfo = uInfo;
294 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
295 return u32EffAddr;
296 }
297
298 uint64_t u64EffAddr;
299 uint64_t uInfo;
300
301 /* Handle the rip+disp32 form with no registers first. */
302 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
303 {
304 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
305 uInfo = (uint32_t)u64EffAddr;
306 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
307 }
308 else
309 {
310 /* Get the register (or SIB) value. */
311 uInfo = 0;
312 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
313 {
314 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
315 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
316 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
317 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
318 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
319 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
320 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
321 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
322 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
323 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
324 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
325 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
326 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
327 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
328 /* SIB */
329 case 4:
330 case 12:
331 {
332 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
333 uInfo = (uint64_t)bSib << 32;
334
335 /* Get the index and scale it. */
336 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
337 {
338 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
339 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
340 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
341 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
342 case 4: u64EffAddr = 0; /*none */ break;
343 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
344 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
345 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
346 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
347 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
348 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
349 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
350 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
351 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
352 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
353 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
354 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
355 }
356 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
357
358 /* add base */
359 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
360 {
361 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
362 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
363 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
364 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
365 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
366 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
367 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
368 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
369 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
370 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
371 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
372 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
373 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
374 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
375 /* complicated encodings */
376 case 5:
377 case 13:
378 if ((bRm & X86_MODRM_MOD_MASK) != 0)
379 {
380 if (!pVCpu->iem.s.uRexB)
381 {
382 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
383 SET_SS_DEF();
384 }
385 else
386 u64EffAddr += pVCpu->cpum.GstCtx.r13;
387 }
388 else
389 {
390 uint32_t u32Disp;
391 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
392 u64EffAddr += (int32_t)u32Disp;
393 uInfo |= u32Disp;
394 }
395 break;
396 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
397 }
398 break;
399 }
400 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
401 }
402
403 /* Get and add the displacement. */
404 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
405 {
406 case 0:
407 break;
408 case 1:
409 {
410 int8_t i8Disp;
411 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
412 u64EffAddr += i8Disp;
413 uInfo |= (uint32_t)(int32_t)i8Disp;
414 break;
415 }
416 case 2:
417 {
418 uint32_t u32Disp;
419 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
420 u64EffAddr += (int32_t)u32Disp;
421 uInfo |= u32Disp;
422 break;
423 }
424 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
425 }
426
427 }
428
429 *puInfo = uInfo;
430 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
431 {
432 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
433 return u64EffAddr;
434 }
435 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
436 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
437 return u64EffAddr & UINT32_MAX;
438}
439
440
441/*********************************************************************************************************************************
442* Translation Block Cache. *
443*********************************************************************************************************************************/
444
445/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
446static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
447{
448 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
449 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
450 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
451 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
452 if (cMsSinceUse1 != cMsSinceUse2)
453 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
454 if (pTb1->cUsed != pTb2->cUsed)
455 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
456 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
457 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
458 return 0;
459}
460
461#ifdef VBOX_STRICT
462/**
463 * Assertion helper that checks a collisions list count.
464 */
465static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
466{
467 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
468 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
469 while (pTb)
470 {
471 pTb = pTb->pNext;
472 cLeft--;
473 }
474 AssertMsg(cLeft == 0,
475 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
476 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
477}
478#endif
479
480
481DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
482{
483 STAM_PROFILE_START(&pTbCache->StatPrune, a);
484
485 /*
486 * First convert the collision list to an array.
487 */
488 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
489 uintptr_t cInserted = 0;
490 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
491
492 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
493
494 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
495 {
496 apSortedTbs[cInserted++] = pTbCollision;
497 pTbCollision = pTbCollision->pNext;
498 }
499
500 /* Free any excess (impossible). */
501 if (RT_LIKELY(!pTbCollision))
502 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
503 else
504 do
505 {
506 PIEMTB pTbToFree = pTbCollision;
507 pTbCollision = pTbToFree->pNext;
508 iemTbAllocatorFree(pVCpu, pTbToFree);
509 } while (pTbCollision);
510
511 /*
512 * Sort it by most recently used and usage count.
513 */
514 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
515
516 /* We keep half the list for now. Perhaps a bit aggressive... */
517 uintptr_t const cKeep = cInserted / 2;
518
519 /* First free up the TBs we don't wish to keep (before creating the new
520 list because otherwise the free code will scan the list for each one
521 without ever finding it). */
522 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
523 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
524
525 /* Then chain the new TB together with the ones we like to keep of the
526 existing ones and insert this list into the hash table. */
527 pTbCollision = pTb;
528 for (uintptr_t idx = 0; idx < cKeep; idx++)
529 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
530 pTbCollision->pNext = NULL;
531
532 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
533#ifdef VBOX_STRICT
534 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
535#endif
536
537 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
538}
539
540
541static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
542{
543 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
544 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
545 if (!pTbOldHead)
546 {
547 pTb->pNext = NULL;
548 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
549 }
550 else
551 {
552 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
553 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
554 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
555 {
556 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
557 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
558#ifdef VBOX_STRICT
559 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
560#endif
561 }
562 else
563 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
564 }
565}
566
567
568/**
569 * Unlinks @a pTb from the hash table if found in it.
570 *
571 * @returns true if unlinked, false if not present.
572 * @param pTbCache The hash table.
573 * @param pTb The TB to remove.
574 */
575static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
576{
577 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
578 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
579 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
580
581 /*
582 * At the head of the collision list?
583 */
584 if (pTbHash == pTb)
585 {
586 if (!pTb->pNext)
587 pTbCache->apHash[idxHash] = NULL;
588 else
589 {
590 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
591 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
592#ifdef VBOX_STRICT
593 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
594#endif
595 }
596 return true;
597 }
598
599 /*
600 * Search the collision list.
601 */
602 PIEMTB const pTbHead = pTbHash;
603 while (pTbHash)
604 {
605 PIEMTB const pNextTb = pTbHash->pNext;
606 if (pNextTb == pTb)
607 {
608 pTbHash->pNext = pTb->pNext;
609 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
610#ifdef VBOX_STRICT
611 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
612#endif
613 return true;
614 }
615 pTbHash = pNextTb;
616 }
617 return false;
618}
619
620
621/**
622 * Looks up a TB for the given PC and flags in the cache.
623 *
624 * @returns Pointer to TB on success, NULL if not found.
625 * @param pVCpu The cross context virtual CPU structure of the
626 * calling thread.
627 * @param pTbCache The translation block cache.
628 * @param GCPhysPc The PC to look up a TB for.
629 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
630 * the lookup.
631 * @thread EMT(pVCpu)
632 */
633static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
634 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
635{
636 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
637 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
638 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
639#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
640 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
641#endif
642 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p L %d\n", fFlags, GCPhysPc, idxHash, pTb, cLeft));
643 while (pTb)
644 {
645 if (pTb->GCPhysPc == GCPhysPc)
646 {
647 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
648 {
649 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
650 {
651 STAM_COUNTER_INC(&pTbCache->cLookupHits);
652 AssertMsg(cLeft > 0, ("%d\n", cLeft));
653
654 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
655 pTb->cUsed++;
656#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
657 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != 16)
658 return pTb;
659 return iemNativeRecompile(pVCpu, pTb);
660#else
661 return pTb;
662#endif
663 }
664 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
665 }
666 else
667 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
668 }
669 else
670 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
671
672 pTb = pTb->pNext;
673#ifdef VBOX_STRICT
674 cLeft--;
675#endif
676 }
677 AssertMsg(cLeft == 0, ("%d\n", cLeft));
678 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
679 return pTb;
680}
681
682
683/*********************************************************************************************************************************
684* Translation Block Allocator.
685*********************************************************************************************************************************/
686/*
687 * Translation block allocationmanagement.
688 */
689
690#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
691# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
692 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
693# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
694 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
695# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
696 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
697#else
698# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
699 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
700# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
701 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
702# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
703 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
704#endif
705/** Makes a TB index from a chunk index and TB index within that chunk. */
706#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
707 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
708
709
710/**
711 * Initializes the TB allocator and cache for an EMT.
712 *
713 * @returns VBox status code.
714 * @param pVM The VM handle.
715 * @param cInitialTbs The initial number of translation blocks to
716 * preallocator.
717 * @param cMaxTbs The max number of translation blocks allowed.
718 * @param cbInitialExec The initial size of the executable memory allocator.
719 * @param cbMaxExec The max size of the executable memory allocator.
720 * @param cbChunkExec The chunk size for executable memory allocator. Zero
721 * or UINT32_MAX for automatically determining this.
722 * @thread EMT
723 */
724DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
725 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
726{
727 PVMCPUCC pVCpu = VMMGetCpu(pVM);
728 Assert(!pVCpu->iem.s.pTbCacheR3);
729 Assert(!pVCpu->iem.s.pTbAllocatorR3);
730
731 /*
732 * Calculate the chunk size of the TB allocator.
733 * The minimum chunk size is 2MiB.
734 */
735 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
736 uint32_t cbPerChunk = _2M;
737 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
738#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
739 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
740 uint8_t cChunkShift = 21 - cTbShift;
741 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
742#endif
743 for (;;)
744 {
745 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
746 break;
747 cbPerChunk *= 2;
748 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
749#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
750 cChunkShift += 1;
751#endif
752 }
753
754 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
755 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
756 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
757
758 cMaxTbs = cMaxChunks * cTbsPerChunk;
759
760 /*
761 * Allocate and initalize it.
762 */
763 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
764 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
765 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
766 if (!pTbAllocator)
767 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
768 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
769 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
770 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
771 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
772 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
773 pTbAllocator->cbPerChunk = cbPerChunk;
774 pTbAllocator->cMaxTbs = cMaxTbs;
775#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
776 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
777 pTbAllocator->cChunkShift = cChunkShift;
778 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
779#endif
780
781 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
782 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
783
784 /*
785 * Allocate the initial chunks.
786 */
787 for (uint32_t idxChunk = 0; ; idxChunk++)
788 {
789 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
790 if (!paTbs)
791 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
792 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
793 cbPerChunk, idxChunk, pVCpu->idCpu);
794
795 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
796 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
797 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
798 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
799 pTbAllocator->cTotalTbs += cTbsPerChunk;
800
801 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
802 break;
803 }
804
805 /*
806 * Calculate the size of the hash table. We double the max TB count and
807 * round it up to the nearest power of two.
808 */
809 uint32_t cCacheEntries = cMaxTbs * 2;
810 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
811 {
812 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
813 cCacheEntries = RT_BIT_32(iBitTop);
814 Assert(cCacheEntries >= cMaxTbs * 2);
815 }
816
817 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
818 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
819 if (!pTbCache)
820 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
821 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
822 cbTbCache, cCacheEntries, pVCpu->idCpu);
823
824 /*
825 * Initialize it (assumes zeroed by the allocator).
826 */
827 pTbCache->uMagic = IEMTBCACHE_MAGIC;
828 pTbCache->cHash = cCacheEntries;
829 pTbCache->uHashMask = cCacheEntries - 1;
830 Assert(pTbCache->cHash > pTbCache->uHashMask);
831 pVCpu->iem.s.pTbCacheR3 = pTbCache;
832
833 /*
834 * Initialize the native executable memory allocator.
835 */
836#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
837 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
838 AssertLogRelRCReturn(rc, rc);
839#else
840 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
841#endif
842
843 return VINF_SUCCESS;
844}
845
846
847/**
848 * Inner free worker.
849 */
850static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
851 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
852{
853 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
854 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
855 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
856 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
857
858 /*
859 * Unlink the TB from the hash table.
860 */
861 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
862
863 /*
864 * Free the TB itself.
865 */
866 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
867 {
868 case IEMTB_F_TYPE_THREADED:
869 pTbAllocator->cThreadedTbs -= 1;
870 RTMemFree(pTb->Thrd.paCalls);
871 break;
872#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
873 case IEMTB_F_TYPE_NATIVE:
874 pTbAllocator->cNativeTbs -= 1;
875 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
876 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
877 break;
878#endif
879 default:
880 AssertFailed();
881 }
882 RTMemFree(pTb->pabOpcodes);
883
884 pTb->pNext = NULL;
885 pTb->fFlags = 0;
886 pTb->GCPhysPc = UINT64_MAX;
887 pTb->Gen.uPtr = 0;
888 pTb->Gen.uData = 0;
889 pTb->cbOpcodes = 0;
890 pTb->pabOpcodes = NULL;
891
892 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
893 Assert(pTbAllocator->cInUseTbs > 0);
894
895 pTbAllocator->cInUseTbs -= 1;
896 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
897}
898
899
900/**
901 * Frees the given TB.
902 *
903 * @param pVCpu The cross context virtual CPU structure of the calling
904 * thread.
905 * @param pTb The translation block to free.
906 * @thread EMT(pVCpu)
907 */
908static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
909{
910 /*
911 * Validate state.
912 */
913 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
914 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
915 uint8_t const idxChunk = pTb->idxAllocChunk;
916 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
917 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
918 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
919
920 /*
921 * Call inner worker.
922 */
923 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
924}
925
926
927/**
928 * Schedules a native TB for freeing when it's not longer being executed and
929 * part of the caller's call stack.
930 *
931 * The TB will be removed from the translation block cache, though, so it isn't
932 * possible to executed it again and the IEMTB::pNext member can be used to link
933 * it together with other TBs awaiting freeing.
934 *
935 * @param pVCpu The cross context virtual CPU structure of the calling
936 * thread.
937 * @param pTb The translation block to schedule for freeing.
938 */
939static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
940{
941 /*
942 * Validate state.
943 */
944 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
945 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
946 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
947 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
948 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
949 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
950 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
951 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
952
953 /*
954 * Remove it from the cache and prepend it to the allocator's todo list.
955 */
956 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
957
958 pTb->pNext = pTbAllocator->pDelayedFreeHead;
959 pTbAllocator->pDelayedFreeHead = pTb;
960}
961
962
963/**
964 * Processes the delayed frees.
965 *
966 * This is called by the allocator function as well as the native recompile
967 * function before making any TB or executable memory allocations respectively.
968 */
969void iemTbAllocatorProcessDelayedFrees(PVMCPU pVCpu, PIEMTBALLOCATOR pTbAllocator)
970{
971 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
972 pTbAllocator->pDelayedFreeHead = NULL;
973 while (pTb)
974 {
975 PIEMTB const pTbNext = pTb->pNext;
976 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
977 iemTbAlloctorScheduleForFree(pVCpu, pTb);
978 pTb = pTbNext;
979 }
980}
981
982
983/**
984 * Grow the translation block allocator with another chunk.
985 */
986static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
987{
988 /*
989 * Validate state.
990 */
991 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
992 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
993 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
994 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
995 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
996
997 /*
998 * Allocate a new chunk and add it to the allocator.
999 */
1000 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1001 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1002 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1003
1004 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1005 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1006 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1007 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1008 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1009 pTbAllocator->cTotalTbs += cTbsPerChunk;
1010 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1011
1012 return VINF_SUCCESS;
1013}
1014
1015
1016/**
1017 * Allocates a TB from allocator with free block.
1018 *
1019 * This is common code to both the fast and slow allocator code paths.
1020 */
1021DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1022{
1023 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1024
1025 int idxTb;
1026 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1027 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1028 pTbAllocator->cTotalTbs,
1029 pTbAllocator->iStartHint & ~(uint32_t)63);
1030 else
1031 idxTb = -1;
1032 if (idxTb < 0)
1033 {
1034 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1035 AssertLogRelReturn(idxTb >= 0, NULL);
1036 }
1037 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1038 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1039
1040 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1041 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1042 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1043 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1044 Assert(pTb->idxAllocChunk == idxChunk);
1045
1046 pTbAllocator->cInUseTbs += 1;
1047 if (fThreaded)
1048 pTbAllocator->cThreadedTbs += 1;
1049 else
1050 pTbAllocator->cNativeTbs += 1;
1051 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1052 return pTb;
1053}
1054
1055
1056/**
1057 * Slow path for iemTbAllocatorAlloc.
1058 */
1059static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1060{
1061 /*
1062 * With some luck we can add another chunk.
1063 */
1064 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1065 {
1066 int rc = iemTbAllocatorGrow(pVCpu);
1067 if (RT_SUCCESS(rc))
1068 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1069 }
1070
1071 /*
1072 * We have to prune stuff. Sigh.
1073 *
1074 * This requires scanning for older TBs and kick them out. Not sure how to
1075 * best do this as we don't want to maintain any list of TBs ordered by last
1076 * usage time. But one reasonably simple approach would be that each time we
1077 * get here we continue a sequential scan of the allocation chunks,
1078 * considering just a smallish number of TBs and freeing a fixed portion of
1079 * them. Say, we consider the next 128 TBs, freeing the least recently used
1080 * in out of groups of 4 TBs, resulting in 32 free TBs.
1081 */
1082 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1083 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1084 uint32_t const cTbsToPrune = 128;
1085 uint32_t const cTbsPerGroup = 4;
1086 uint32_t cFreedTbs = 0;
1087#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1088 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1089#else
1090 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1091#endif
1092 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1093 idxTbPruneFrom = 0;
1094 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1095 {
1096 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1097 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1098 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1099 uint32_t cMsAge = msNow - pTb->msLastUsed;
1100 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1101
1102 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1103 {
1104#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1105 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1106 { /* likely */ }
1107 else
1108 {
1109 idxInChunk2 = 0;
1110 idxChunk2 += 1;
1111 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1112 idxChunk2 = 0;
1113 }
1114#endif
1115 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1116 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1117 if ( cMsAge2 > cMsAge
1118 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1119 {
1120 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1121 pTb = pTb2;
1122 idxChunk = idxChunk2;
1123 idxInChunk = idxInChunk2;
1124 cMsAge = cMsAge2;
1125 }
1126 }
1127
1128 /* Free the TB. */
1129 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1130 cFreedTbs++; /* paranoia */
1131 }
1132 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1133 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1134
1135 /*
1136 * Allocate a TB from the ones we've pruned.
1137 */
1138 if (cFreedTbs)
1139 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1140 return NULL;
1141}
1142
1143
1144/**
1145 * Allocate a translation block.
1146 *
1147 * @returns Pointer to block on success, NULL if we're out and is unable to
1148 * free up an existing one (very unlikely once implemented).
1149 * @param pVCpu The cross context virtual CPU structure of the calling
1150 * thread.
1151 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1152 * For statistics.
1153 */
1154DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1155{
1156 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1157 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1158
1159 /* Free any pending TBs before we proceed. */
1160 if (!pTbAllocator->pDelayedFreeHead)
1161 { /* probably likely */ }
1162 else
1163 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1164
1165 /* If the allocator is full, take slow code path.*/
1166 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1167 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1168 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1169}
1170
1171
1172
1173/*********************************************************************************************************************************
1174* Threaded Recompiler Core *
1175*********************************************************************************************************************************/
1176
1177/**
1178 * Allocate a translation block for threadeded recompilation.
1179 *
1180 * This is allocated with maxed out call table and storage for opcode bytes,
1181 * because it's only supposed to be called once per EMT to allocate the TB
1182 * pointed to by IEMCPU::pThrdCompileTbR3.
1183 *
1184 * @returns Pointer to the translation block on success, NULL on failure.
1185 * @param pVM The cross context virtual machine structure.
1186 * @param pVCpu The cross context virtual CPU structure of the calling
1187 * thread.
1188 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1189 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1190 */
1191static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1192{
1193 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1194 if (pTb)
1195 {
1196 unsigned const cCalls = 256;
1197 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1198 if (pTb->Thrd.paCalls)
1199 {
1200 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1201 if (pTb->pabOpcodes)
1202 {
1203 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1204 pTb->Thrd.cAllocated = cCalls;
1205 pTb->Thrd.cCalls = 0;
1206 pTb->cbOpcodes = 0;
1207 pTb->pNext = NULL;
1208 pTb->cUsed = 0;
1209 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1210 pTb->idxAllocChunk = UINT8_MAX;
1211 pTb->GCPhysPc = GCPhysPc;
1212 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1213 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1214 pTb->cInstructions = 0;
1215
1216 /* Init the first opcode range. */
1217 pTb->cRanges = 1;
1218 pTb->aRanges[0].cbOpcodes = 0;
1219 pTb->aRanges[0].offOpcodes = 0;
1220 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1221 pTb->aRanges[0].u2Unused = 0;
1222 pTb->aRanges[0].idxPhysPage = 0;
1223 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1224 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1225
1226 return pTb;
1227 }
1228 RTMemFree(pTb->Thrd.paCalls);
1229 }
1230 RTMemFree(pTb);
1231 }
1232 RT_NOREF(pVM);
1233 return NULL;
1234}
1235
1236
1237/**
1238 * Called on the TB that are dedicated for recompilation before it's reused.
1239 *
1240 * @param pVCpu The cross context virtual CPU structure of the calling
1241 * thread.
1242 * @param pTb The translation block to reuse.
1243 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1244 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1245 */
1246static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1247{
1248 pTb->GCPhysPc = GCPhysPc;
1249 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1250 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1251 pTb->Thrd.cCalls = 0;
1252 pTb->cbOpcodes = 0;
1253 pTb->cInstructions = 0;
1254
1255 /* Init the first opcode range. */
1256 pTb->cRanges = 1;
1257 pTb->aRanges[0].cbOpcodes = 0;
1258 pTb->aRanges[0].offOpcodes = 0;
1259 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1260 pTb->aRanges[0].u2Unused = 0;
1261 pTb->aRanges[0].idxPhysPage = 0;
1262 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1263 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1264}
1265
1266
1267/**
1268 * Used to duplicate a threded translation block after recompilation is done.
1269 *
1270 * @returns Pointer to the translation block on success, NULL on failure.
1271 * @param pVM The cross context virtual machine structure.
1272 * @param pVCpu The cross context virtual CPU structure of the calling
1273 * thread.
1274 * @param pTbSrc The TB to duplicate.
1275 */
1276static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1277{
1278 /*
1279 * Just using the heap for now. Will make this more efficient and
1280 * complicated later, don't worry. :-)
1281 */
1282 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1283 if (pTb)
1284 {
1285 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1286 memcpy(pTb, pTbSrc, sizeof(*pTb));
1287 pTb->idxAllocChunk = idxAllocChunk;
1288
1289 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1290 Assert(cCalls > 0);
1291 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1292 if (pTb->Thrd.paCalls)
1293 {
1294 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1295 Assert(cbOpcodes > 0);
1296 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1297 if (pTb->pabOpcodes)
1298 {
1299 pTb->Thrd.cAllocated = cCalls;
1300 pTb->pNext = NULL;
1301 pTb->cUsed = 0;
1302 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1303 pTb->fFlags = pTbSrc->fFlags;
1304
1305 return pTb;
1306 }
1307 RTMemFree(pTb->Thrd.paCalls);
1308 }
1309 iemTbAllocatorFree(pVCpu, pTb);
1310 }
1311 RT_NOREF(pVM);
1312 return NULL;
1313
1314}
1315
1316
1317/**
1318 * Adds the given TB to the hash table.
1319 *
1320 * @param pVCpu The cross context virtual CPU structure of the calling
1321 * thread.
1322 * @param pTbCache The cache to add it to.
1323 * @param pTb The translation block to add.
1324 */
1325static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1326{
1327 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1328
1329 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1330 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1331 if (LogIs12Enabled())
1332 {
1333 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1334 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1335 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1336 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1337 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1338 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1339 pTb->aRanges[idxRange].idxPhysPage == 0
1340 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1341 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1342 }
1343}
1344
1345
1346/**
1347 * Called by opcode verifier functions when they detect a problem.
1348 */
1349void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1350{
1351 /* Unless it's safe, we can only immediately free threaded TB, as we will
1352 have more code left to execute in native TBs when fSafeToFree == false. */
1353 if (fSafeToFree || (pTb->fFlags & IEMTB_F_TYPE_THREADED))
1354 iemTbAllocatorFree(pVCpu, pTb);
1355 else
1356 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1357}
1358
1359
1360/*
1361 * Real code.
1362 */
1363
1364#ifdef LOG_ENABLED
1365/**
1366 * Logs the current instruction.
1367 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1368 * @param pszFunction The IEM function doing the execution.
1369 * @param idxInstr The instruction number in the block.
1370 */
1371static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1372{
1373# ifdef IN_RING3
1374 if (LogIs2Enabled())
1375 {
1376 char szInstr[256];
1377 uint32_t cbInstr = 0;
1378 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1379 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1380 szInstr, sizeof(szInstr), &cbInstr);
1381
1382 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1383 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1384 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1385 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1386 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1387 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1388 " %s\n"
1389 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1390 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1391 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1392 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1393 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1394 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1395 szInstr));
1396
1397 if (LogIs3Enabled())
1398 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);
1399 }
1400 else
1401# endif
1402 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1403 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1404}
1405#endif /* LOG_ENABLED */
1406
1407
1408#if 0
1409static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1410{
1411 RT_NOREF(pVM, pVCpu);
1412 return rcStrict;
1413}
1414#endif
1415
1416
1417/**
1418 * Initializes the decoder state when compiling TBs.
1419 *
1420 * This presumes that fExec has already be initialized.
1421 *
1422 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1423 * to apply fixes to them as well.
1424 *
1425 * @param pVCpu The cross context virtual CPU structure of the calling
1426 * thread.
1427 * @param fReInit Clear for the first call for a TB, set for subsequent
1428 * calls from inside the compile loop where we can skip a
1429 * couple of things.
1430 * @param fExtraFlags The extra translation block flags when @a fReInit is
1431 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1432 * checked.
1433 */
1434DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1435{
1436 /* ASSUMES: That iemInitExec was already called and that anyone changing
1437 CPU state affecting the fExec bits since then will have updated fExec! */
1438 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1439 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1440
1441 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1442
1443 /* Decoder state: */
1444 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1445 pVCpu->iem.s.enmEffAddrMode = enmMode;
1446 if (enmMode != IEMMODE_64BIT)
1447 {
1448 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1449 pVCpu->iem.s.enmEffOpSize = enmMode;
1450 }
1451 else
1452 {
1453 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1454 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1455 }
1456 pVCpu->iem.s.fPrefixes = 0;
1457 pVCpu->iem.s.uRexReg = 0;
1458 pVCpu->iem.s.uRexB = 0;
1459 pVCpu->iem.s.uRexIndex = 0;
1460 pVCpu->iem.s.idxPrefix = 0;
1461 pVCpu->iem.s.uVex3rdReg = 0;
1462 pVCpu->iem.s.uVexLength = 0;
1463 pVCpu->iem.s.fEvexStuff = 0;
1464 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1465 pVCpu->iem.s.offModRm = 0;
1466 pVCpu->iem.s.iNextMapping = 0;
1467
1468 if (!fReInit)
1469 {
1470 pVCpu->iem.s.cActiveMappings = 0;
1471 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1472 pVCpu->iem.s.fEndTb = false;
1473 pVCpu->iem.s.fTbCheckOpcodes = false;
1474 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1475 pVCpu->iem.s.fTbCrossedPage = false;
1476 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1477 pVCpu->iem.s.fTbCurInstrIsSti = false;
1478 /* Force RF clearing and TF checking on first instruction in the block
1479 as we don't really know what came before and should assume the worst: */
1480 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1481 }
1482 else
1483 {
1484 Assert(pVCpu->iem.s.cActiveMappings == 0);
1485 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1486 Assert(pVCpu->iem.s.fEndTb == false);
1487 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1488 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1489 }
1490 pVCpu->iem.s.fTbCurInstr = 0;
1491
1492#ifdef DBGFTRACE_ENABLED
1493 switch (IEM_GET_CPU_MODE(pVCpu))
1494 {
1495 case IEMMODE_64BIT:
1496 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1497 break;
1498 case IEMMODE_32BIT:
1499 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1500 break;
1501 case IEMMODE_16BIT:
1502 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1503 break;
1504 }
1505#endif
1506}
1507
1508
1509/**
1510 * Initializes the opcode fetcher when starting the compilation.
1511 *
1512 * @param pVCpu The cross context virtual CPU structure of the calling
1513 * thread.
1514 */
1515DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1516{
1517 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1518#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1519 pVCpu->iem.s.offOpcode = 0;
1520#else
1521 RT_NOREF(pVCpu);
1522#endif
1523}
1524
1525
1526/**
1527 * Re-initializes the opcode fetcher between instructions while compiling.
1528 *
1529 * @param pVCpu The cross context virtual CPU structure of the calling
1530 * thread.
1531 */
1532DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1533{
1534 if (pVCpu->iem.s.pbInstrBuf)
1535 {
1536 uint64_t off = pVCpu->cpum.GstCtx.rip;
1537 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1538 off += pVCpu->cpum.GstCtx.cs.u64Base;
1539 off -= pVCpu->iem.s.uInstrBufPc;
1540 if (off < pVCpu->iem.s.cbInstrBufTotal)
1541 {
1542 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1543 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1544 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1545 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1546 else
1547 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1548 }
1549 else
1550 {
1551 pVCpu->iem.s.pbInstrBuf = NULL;
1552 pVCpu->iem.s.offInstrNextByte = 0;
1553 pVCpu->iem.s.offCurInstrStart = 0;
1554 pVCpu->iem.s.cbInstrBuf = 0;
1555 pVCpu->iem.s.cbInstrBufTotal = 0;
1556 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1557 }
1558 }
1559 else
1560 {
1561 pVCpu->iem.s.offInstrNextByte = 0;
1562 pVCpu->iem.s.offCurInstrStart = 0;
1563 pVCpu->iem.s.cbInstrBuf = 0;
1564 pVCpu->iem.s.cbInstrBufTotal = 0;
1565#ifdef VBOX_STRICT
1566 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1567#endif
1568 }
1569#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1570 pVCpu->iem.s.offOpcode = 0;
1571#endif
1572}
1573
1574
1575DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1576{
1577 switch (cbInstr)
1578 {
1579 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1580 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1581 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1582 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1583 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1584 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1585 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1586 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1587 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1588 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1589 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1590 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1591 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1592 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1593 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1594 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1595 }
1596}
1597
1598
1599/**
1600 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1601 *
1602 * - CS LIM check required.
1603 * - Must recheck opcode bytes.
1604 * - Previous instruction branched.
1605 * - TLB load detected, probably due to page crossing.
1606 *
1607 * @returns true if everything went well, false if we're out of space in the TB
1608 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1609 * @param pVCpu The cross context virtual CPU structure of the calling
1610 * thread.
1611 * @param pTb The translation block being compiled.
1612 */
1613bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1614{
1615 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1616 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1617#if 0
1618 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1619 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1620#endif
1621
1622 /*
1623 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1624 * see if it's needed to start checking.
1625 */
1626 bool fConsiderCsLimChecking;
1627 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1628 if ( fMode == IEM_F_MODE_X86_64BIT
1629 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
1630 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
1631 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
1632 fConsiderCsLimChecking = false; /* already enabled or not needed */
1633 else
1634 {
1635 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1636 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1637 fConsiderCsLimChecking = true; /* likely */
1638 else
1639 {
1640 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
1641 return false;
1642 }
1643 }
1644
1645 /*
1646 * Prepare call now, even before we know if can accept the instruction in this TB.
1647 * This allows us amending parameters w/o making every case suffer.
1648 */
1649 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1650 uint16_t const offOpcode = pTb->cbOpcodes;
1651 uint8_t idxRange = pTb->cRanges - 1;
1652
1653 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1654 pCall->idxInstr = pTb->cInstructions;
1655 pCall->offOpcode = offOpcode;
1656 pCall->idxRange = idxRange;
1657 pCall->cbOpcode = cbInstr;
1658 pCall->auParams[0] = cbInstr;
1659 pCall->auParams[1] = idxRange;
1660 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1661
1662/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1663 * gotten onto. If we do, stop */
1664
1665 /*
1666 * Case 1: We've branched (RIP changed).
1667 *
1668 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1669 * Req: 1 extra range, no extra phys.
1670 *
1671 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1672 * necessary (fTbCrossedPage is true).
1673 * Req: 1 extra range, probably 1 extra phys page entry.
1674 *
1675 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1676 * but in addition we cross into the following page and require
1677 * another TLB load.
1678 * Req: 2 extra ranges, probably 2 extra phys page entries.
1679 *
1680 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1681 * the following page (thus fTbCrossedPage is true).
1682 * Req: 2 extra ranges, probably 1 extra phys page entry.
1683 *
1684 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1685 * it may trigger "spuriously" from the CPU point of view because of
1686 * physical page changes that'll invalid the physical TLB and trigger a
1687 * call to the function. In theory this be a big deal, just a bit
1688 * performance loss as we'll pick the LoadingTlb variants.
1689 *
1690 * Note! We do not currently optimize branching to the next instruction (sorry
1691 * 32-bit PIC code). We could maybe do that in the branching code that
1692 * sets (or not) fTbBranched.
1693 */
1694 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1695 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1696 * code. This'll require filtering out far jmps and calls, as they
1697 * load CS which should technically be considered indirect since the
1698 * GDT/LDT entry's base address can be modified independently from
1699 * the code. */
1700 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
1701 {
1702 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1703 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1704 {
1705 /* 1a + 1b - instruction fully within the branched to page. */
1706 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1707 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1708
1709 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1710 {
1711 /* Check that we've got a free range. */
1712 idxRange += 1;
1713 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1714 { /* likely */ }
1715 else
1716 {
1717 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1718 return false;
1719 }
1720 pCall->idxRange = idxRange;
1721 pCall->auParams[1] = idxRange;
1722 pCall->auParams[2] = 0;
1723
1724 /* Check that we've got a free page slot. */
1725 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1726 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1727 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1728 pTb->aRanges[idxRange].idxPhysPage = 0;
1729 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1730 || pTb->aGCPhysPages[0] == GCPhysNew)
1731 {
1732 pTb->aGCPhysPages[0] = GCPhysNew;
1733 pTb->aRanges[idxRange].idxPhysPage = 1;
1734 }
1735 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1736 || pTb->aGCPhysPages[1] == GCPhysNew)
1737 {
1738 pTb->aGCPhysPages[1] = GCPhysNew;
1739 pTb->aRanges[idxRange].idxPhysPage = 2;
1740 }
1741 else
1742 {
1743 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1744 return false;
1745 }
1746
1747 /* Finish setting up the new range. */
1748 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1749 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1750 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1751 pTb->aRanges[idxRange].u2Unused = 0;
1752 pTb->cRanges++;
1753 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
1754 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
1755 pTb->aRanges[idxRange].offOpcodes));
1756 }
1757 else
1758 {
1759 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1760 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1761 }
1762
1763 /* Determin which function we need to load & check.
1764 Note! For jumps to a new page, we'll set both fTbBranched and
1765 fTbCrossedPage to avoid unnecessary TLB work for intra
1766 page branching */
1767 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1768 || pVCpu->iem.s.fTbCrossedPage)
1769 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1770 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1771 : !fConsiderCsLimChecking
1772 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1773 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1774 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1775 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1776 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1777 : !fConsiderCsLimChecking
1778 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1779 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1780 else
1781 {
1782 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1783 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1784 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1785 : !fConsiderCsLimChecking
1786 ? kIemThreadedFunc_BltIn_CheckOpcodes
1787 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1788 }
1789 }
1790 else
1791 {
1792 /* 1c + 1d - instruction crosses pages. */
1793 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1794 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1795
1796 /* Lazy bird: Check that this isn't case 1c, since we've already
1797 load the first physical address. End the TB and
1798 make it a case 2b instead.
1799
1800 Hmm. Too much bother to detect, so just do the same
1801 with case 1d as well. */
1802#if 0 /** @todo get back to this later when we've got the actual branch code in
1803 * place. */
1804 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1805
1806 /* Check that we've got two free ranges. */
1807 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1808 { /* likely */ }
1809 else
1810 return false;
1811 idxRange += 1;
1812 pCall->idxRange = idxRange;
1813 pCall->auParams[1] = idxRange;
1814 pCall->auParams[2] = 0;
1815
1816 /* ... */
1817
1818#else
1819 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1820 return false;
1821#endif
1822 }
1823 }
1824
1825 /*
1826 * Case 2: Page crossing.
1827 *
1828 * Sub-case 2a: The instruction starts on the first byte in the next page.
1829 *
1830 * Sub-case 2b: The instruction has opcode bytes in both the current and
1831 * following page.
1832 *
1833 * Both cases requires a new range table entry and probably a new physical
1834 * page entry. The difference is in which functions to emit and whether to
1835 * add bytes to the current range.
1836 */
1837 else if (pVCpu->iem.s.fTbCrossedPage)
1838 {
1839 /* Check that we've got a free range. */
1840 idxRange += 1;
1841 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1842 { /* likely */ }
1843 else
1844 {
1845 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1846 return false;
1847 }
1848
1849 /* Check that we've got a free page slot. */
1850 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1851 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1852 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1853 pTb->aRanges[idxRange].idxPhysPage = 0;
1854 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1855 || pTb->aGCPhysPages[0] == GCPhysNew)
1856 {
1857 pTb->aGCPhysPages[0] = GCPhysNew;
1858 pTb->aRanges[idxRange].idxPhysPage = 1;
1859 }
1860 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1861 || pTb->aGCPhysPages[1] == GCPhysNew)
1862 {
1863 pTb->aGCPhysPages[1] = GCPhysNew;
1864 pTb->aRanges[idxRange].idxPhysPage = 2;
1865 }
1866 else
1867 {
1868 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1869 return false;
1870 }
1871
1872 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
1873 {
1874 Assert(pVCpu->iem.s.offCurInstrStart == 0);
1875 pCall->idxRange = idxRange;
1876 pCall->auParams[1] = idxRange;
1877 pCall->auParams[2] = 0;
1878
1879 /* Finish setting up the new range. */
1880 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1881 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1882 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1883 pTb->aRanges[idxRange].u2Unused = 0;
1884 pTb->cRanges++;
1885 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
1886 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
1887 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
1888
1889 /* Determin which function we need to load & check. */
1890 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1891 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1892 : !fConsiderCsLimChecking
1893 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1894 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
1895 }
1896 else
1897 {
1898 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1899 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1900 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1901 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
1902
1903 /* We've good. Split the instruction over the old and new range table entries. */
1904 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
1905
1906 pTb->aRanges[idxRange].offPhysPage = 0;
1907 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
1908 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
1909 pTb->aRanges[idxRange].u2Unused = 0;
1910 pTb->cRanges++;
1911 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
1912 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
1913 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
1914
1915 /* Determin which function we need to load & check. */
1916 if (pVCpu->iem.s.fTbCheckOpcodes)
1917 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1918 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1919 : !fConsiderCsLimChecking
1920 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1921 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
1922 else
1923 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1924 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1925 : !fConsiderCsLimChecking
1926 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1927 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
1928 }
1929 }
1930
1931 /*
1932 * Regular case: No new range required.
1933 */
1934 else
1935 {
1936 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
1937 if (pVCpu->iem.s.fTbCheckOpcodes)
1938 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1939 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1940 : kIemThreadedFunc_BltIn_CheckOpcodes;
1941 else
1942 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
1943
1944 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1945 pTb->cbOpcodes = offOpcode + cbInstr;
1946 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1947 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
1948 }
1949
1950 /*
1951 * Commit the call.
1952 */
1953 pTb->Thrd.cCalls++;
1954
1955 /*
1956 * Clear state.
1957 */
1958 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1959 pVCpu->iem.s.fTbCrossedPage = false;
1960 pVCpu->iem.s.fTbCheckOpcodes = false;
1961
1962 /*
1963 * Copy opcode bytes.
1964 */
1965 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1966 pTb->cbOpcodes = offOpcode + cbInstr;
1967 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
1968
1969 return true;
1970}
1971
1972
1973/**
1974 * Worker for iemThreadedCompileBeginEmitCallsComplications and
1975 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
1976 *
1977 * @returns true if anything is pending, false if not.
1978 * @param pVCpu The cross context virtual CPU structure of the calling
1979 * thread.
1980 */
1981DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
1982{
1983 uint64_t fCpu = pVCpu->fLocalForcedActions;
1984 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
1985#if 1
1986 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
1987 if (RT_LIKELY( !fCpu
1988 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1989 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
1990 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
1991 return false;
1992 return true;
1993#else
1994 return false;
1995#endif
1996
1997}
1998
1999
2000/**
2001 * Called by iemThreadedCompile when a block requires a mode check.
2002 *
2003 * @returns true if we should continue, false if we're out of call entries.
2004 * @param pVCpu The cross context virtual CPU structure of the calling
2005 * thread.
2006 * @param pTb The translation block being compiled.
2007 */
2008static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2009{
2010 /* Emit the call. */
2011 uint32_t const idxCall = pTb->Thrd.cCalls;
2012 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2013 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2014 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2015 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2016 pCall->idxInstr = pTb->cInstructions - 1;
2017 pCall->uUnused0 = 0;
2018 pCall->offOpcode = 0;
2019 pCall->cbOpcode = 0;
2020 pCall->idxRange = 0;
2021 pCall->auParams[0] = pVCpu->iem.s.fExec;
2022 pCall->auParams[1] = 0;
2023 pCall->auParams[2] = 0;
2024 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2025 return true;
2026}
2027
2028
2029/**
2030 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2031 * set.
2032 *
2033 * @returns true if we should continue, false if an IRQ is deliverable or a
2034 * relevant force flag is pending.
2035 * @param pVCpu The cross context virtual CPU structure of the calling
2036 * thread.
2037 * @param pTb The translation block being compiled.
2038 * @sa iemThreadedCompileCheckIrq
2039 */
2040bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2041{
2042 /*
2043 * Skip this we've already emitted a call after the previous instruction
2044 * or if it's the first call, as we're always checking FFs between blocks.
2045 */
2046 uint32_t const idxCall = pTb->Thrd.cCalls;
2047 if ( idxCall > 0
2048 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2049 {
2050 /* Emit the call. */
2051 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2052 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2053 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2054 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2055 pCall->idxInstr = pTb->cInstructions;
2056 pCall->uUnused0 = 0;
2057 pCall->offOpcode = 0;
2058 pCall->cbOpcode = 0;
2059 pCall->idxRange = 0;
2060 pCall->auParams[0] = 0;
2061 pCall->auParams[1] = 0;
2062 pCall->auParams[2] = 0;
2063 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2064
2065 /* Reset the IRQ check value. */
2066 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2067
2068 /*
2069 * Check for deliverable IRQs and pending force flags.
2070 */
2071 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2072 }
2073 return true; /* continue */
2074}
2075
2076
2077/**
2078 * Emits an IRQ check call and checks for pending IRQs.
2079 *
2080 * @returns true if we should continue, false if an IRQ is deliverable or a
2081 * relevant force flag is pending.
2082 * @param pVCpu The cross context virtual CPU structure of the calling
2083 * thread.
2084 * @param pTb The transation block.
2085 * @sa iemThreadedCompileBeginEmitCallsComplications
2086 */
2087static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2088{
2089 /* Check again in a little bit, unless it is immediately following an STI
2090 in which case we *must* check immediately after the next instruction
2091 as well in case it's executed with interrupt inhibition. We could
2092 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2093 bs3-timers-1 which is doing sti + sti + cli. */
2094 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2095 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2096 else
2097 {
2098 pVCpu->iem.s.fTbCurInstrIsSti = false;
2099 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2100 }
2101 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2102
2103 /*
2104 * Emit the call.
2105 */
2106 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2107 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2108 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2109 pCall->idxInstr = pTb->cInstructions;
2110 pCall->uUnused0 = 0;
2111 pCall->offOpcode = 0;
2112 pCall->cbOpcode = 0;
2113 pCall->idxRange = 0;
2114 pCall->auParams[0] = 0;
2115 pCall->auParams[1] = 0;
2116 pCall->auParams[2] = 0;
2117
2118 /*
2119 * Check for deliverable IRQs and pending force flags.
2120 */
2121 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2122}
2123
2124
2125/**
2126 * Compiles a new TB and executes it.
2127 *
2128 * We combine compilation and execution here as it makes it simpler code flow
2129 * in the main loop and it allows interpreting while compiling if we want to
2130 * explore that option.
2131 *
2132 * @returns Strict VBox status code.
2133 * @param pVM The cross context virtual machine structure.
2134 * @param pVCpu The cross context virtual CPU structure of the calling
2135 * thread.
2136 * @param GCPhysPc The physical address corresponding to the current
2137 * RIP+CS.BASE.
2138 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2139 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2140 */
2141static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2142{
2143 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2144 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2145
2146 /*
2147 * Get the TB we use for the recompiling. This is a maxed-out TB so
2148 * that'll we'll make a more efficient copy of when we're done compiling.
2149 */
2150 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2151 if (pTb)
2152 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2153 else
2154 {
2155 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2156 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2157 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2158 }
2159
2160 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2161 functions may get at it. */
2162 pVCpu->iem.s.pCurTbR3 = pTb;
2163
2164#if 0
2165 /* Make sure the CheckIrq condition matches the one in EM. */
2166 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2167 const uint32_t cZeroCalls = 1;
2168#else
2169 const uint32_t cZeroCalls = 0;
2170#endif
2171
2172 /*
2173 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2174 */
2175 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2176 iemThreadedCompileInitOpcodeFetching(pVCpu);
2177 VBOXSTRICTRC rcStrict;
2178 for (;;)
2179 {
2180 /* Process the next instruction. */
2181#ifdef LOG_ENABLED
2182 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2183 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2184 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2185 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2186#endif
2187 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2188 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2189
2190 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2191 if ( rcStrict == VINF_SUCCESS
2192 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2193 && !pVCpu->iem.s.fEndTb)
2194 {
2195 Assert(pTb->Thrd.cCalls > cCallsPrev);
2196 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2197
2198 pVCpu->iem.s.cInstructions++;
2199
2200 /* Check for mode change _after_ certain CIMPL calls, so check that
2201 we continue executing with the same mode value. */
2202 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2203 { /* probable */ }
2204 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2205 { /* extremely likely */ }
2206 else
2207 break;
2208 }
2209 else
2210 {
2211 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2212 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2213 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2214 rcStrict = VINF_SUCCESS;
2215
2216 if (pTb->Thrd.cCalls > cZeroCalls)
2217 {
2218 if (cCallsPrev != pTb->Thrd.cCalls)
2219 pVCpu->iem.s.cInstructions++;
2220 break;
2221 }
2222
2223 pVCpu->iem.s.pCurTbR3 = NULL;
2224 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2225 }
2226
2227 /* Check for IRQs? */
2228 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2229 pVCpu->iem.s.cInstrTillIrqCheck--;
2230 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2231 break;
2232
2233 /* Still space in the TB? */
2234 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2235 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated)
2236 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2237 else
2238 {
2239 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2240 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2241 break;
2242 }
2243 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2244 }
2245
2246 /*
2247 * Duplicate the TB into a completed one and link it.
2248 */
2249 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2250 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2251
2252 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2253
2254#ifdef IEM_COMPILE_ONLY_MODE
2255 /*
2256 * Execute the translation block.
2257 */
2258#endif
2259
2260 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2261}
2262
2263
2264
2265/*********************************************************************************************************************************
2266* Recompiled Execution Core *
2267*********************************************************************************************************************************/
2268
2269
2270/**
2271 * Executes a translation block.
2272 *
2273 * @returns Strict VBox status code.
2274 * @param pVCpu The cross context virtual CPU structure of the calling
2275 * thread.
2276 * @param pTb The translation block to execute.
2277 */
2278static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2279{
2280 /*
2281 * Check the opcodes in the first page before starting execution.
2282 */
2283/** @todo this test should take IEMTB_F_CS_LIM_CHECKS into account or something.
2284 * The 'near jmp+call' test in bs3-cpu-basic-2 triggers the 2nd assertion here by
2285 * altering the CS limit such that only one or the two instruction bytes are valid.
2286 * Since it's a CS_LIMT problem, the pbInstrBuf is good for the full length, and
2287 * the test succeeds if skipped, but we assert in debug builds. */
2288 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2289 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2290 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2291 { /* likely */ }
2292 else
2293 {
2294 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2295 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2296 return VINF_SUCCESS;
2297 }
2298
2299 /*
2300 * Set the current TB so CIMPL functions may get at it.
2301 */
2302 pVCpu->iem.s.pCurTbR3 = pTb;
2303
2304 /*
2305 * Execute the block.
2306 */
2307#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2308 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2309 {
2310 pVCpu->iem.s.cTbExecNative++;
2311# ifdef LOG_ENABLED
2312 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2313# endif
2314# ifdef RT_ARCH_AMD64
2315 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2316# else
2317 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2318# endif
2319 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2320 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2321 { /* likely */ }
2322 else
2323 {
2324 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2325 pVCpu->iem.s.pCurTbR3 = NULL;
2326 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2327
2328 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2329 only to break out of TB execution early. */
2330 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2331 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2332
2333 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2334 and converted to VINF_SUCCESS or whatever is appropriate. */
2335 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2336 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu));
2337
2338 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2339 }
2340 }
2341 else
2342#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2343 {
2344 /*
2345 * The threaded execution loop.
2346 */
2347 pVCpu->iem.s.cTbExecThreaded++;
2348#ifdef LOG_ENABLED
2349 uint64_t uRipPrev = UINT64_MAX;
2350#endif
2351 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2352 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2353 while (cCallsLeft-- > 0)
2354 {
2355#ifdef LOG_ENABLED
2356 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2357 {
2358 uRipPrev = pVCpu->cpum.GstCtx.rip;
2359 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2360 }
2361 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2362 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2363 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2364#endif
2365 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2366 pCallEntry->auParams[0],
2367 pCallEntry->auParams[1],
2368 pCallEntry->auParams[2]);
2369 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2370 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2371 pCallEntry++;
2372 else
2373 {
2374 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2375 pVCpu->iem.s.pCurTbR3 = NULL;
2376 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2377
2378 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2379 only to break out of TB execution early. */
2380 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2381 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2382 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2383 }
2384 }
2385 }
2386
2387 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2388 pVCpu->iem.s.pCurTbR3 = NULL;
2389 return VINF_SUCCESS;
2390}
2391
2392
2393/**
2394 * This is called when the PC doesn't match the current pbInstrBuf.
2395 *
2396 * Upon return, we're ready for opcode fetching. But please note that
2397 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2398 * MMIO or unassigned).
2399 */
2400static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2401{
2402 pVCpu->iem.s.pbInstrBuf = NULL;
2403 pVCpu->iem.s.offCurInstrStart = 0;
2404 pVCpu->iem.s.offInstrNextByte = 0;
2405 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2406 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2407}
2408
2409
2410/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2411DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2412{
2413 /*
2414 * Set uCurTbStartPc to RIP and calc the effective PC.
2415 */
2416 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2417 pVCpu->iem.s.uCurTbStartPc = uPc;
2418 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2419 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2420
2421 /*
2422 * Advance within the current buffer (PAGE) when possible.
2423 */
2424 if (pVCpu->iem.s.pbInstrBuf)
2425 {
2426 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2427 if (off < pVCpu->iem.s.cbInstrBufTotal)
2428 {
2429 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2430 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2431 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2432 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2433 else
2434 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2435
2436 return pVCpu->iem.s.GCPhysInstrBuf + off;
2437 }
2438 }
2439 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2440}
2441
2442
2443/**
2444 * Determines the extra IEMTB_F_XXX flags.
2445 *
2446 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2447 * IEMTB_F_CS_LIM_CHECKS (or zero).
2448 * @param pVCpu The cross context virtual CPU structure of the calling
2449 * thread.
2450 */
2451DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2452{
2453 uint32_t fRet = 0;
2454
2455 /*
2456 * Determine the inhibit bits.
2457 */
2458 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2459 { /* typical */ }
2460 else
2461 {
2462 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2463 fRet |= IEMTB_F_INHIBIT_SHADOW;
2464 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2465 fRet |= IEMTB_F_INHIBIT_NMI;
2466 }
2467
2468 /*
2469 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2470 * likely to go invalid before the end of the translation block.
2471 */
2472 if (IEM_IS_64BIT_CODE(pVCpu))
2473 return fRet;
2474
2475 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2476 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2477 return fRet;
2478 return fRet | IEMTB_F_CS_LIM_CHECKS;
2479}
2480
2481
2482VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2483{
2484 /*
2485 * See if there is an interrupt pending in TRPM, inject it if we can.
2486 */
2487 if (!TRPMHasTrap(pVCpu))
2488 { /* likely */ }
2489 else
2490 {
2491 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2492 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2493 { /*likely */ }
2494 else
2495 return rcStrict;
2496 }
2497
2498 /*
2499 * Init the execution environment.
2500 */
2501#ifdef RT_ARCH_ARM64 /** @todo ARM64: fix unaligned locked instructions properly. @bugref{10547} */
2502 if (pVM->cCpus == 1)
2503 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
2504 else
2505#endif
2506 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2507 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2508 { }
2509 else
2510 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2511
2512 /*
2513 * Run-loop.
2514 *
2515 * If we're using setjmp/longjmp we combine all the catching here to avoid
2516 * having to call setjmp for each block we're executing.
2517 */
2518 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2519 for (;;)
2520 {
2521 PIEMTB pTb = NULL;
2522 VBOXSTRICTRC rcStrict;
2523 IEM_TRY_SETJMP(pVCpu, rcStrict)
2524 {
2525 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2526 for (uint32_t iIterations = 0; ; iIterations++)
2527 {
2528 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2529 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2530 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2531
2532 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2533 if (pTb)
2534 rcStrict = iemTbExec(pVCpu, pTb);
2535 else
2536 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2537 if (rcStrict == VINF_SUCCESS)
2538 {
2539 Assert(pVCpu->iem.s.cActiveMappings == 0);
2540
2541 uint64_t fCpu = pVCpu->fLocalForcedActions;
2542 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2543 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2544 | VMCPU_FF_TLB_FLUSH
2545 | VMCPU_FF_UNHALT );
2546 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2547 if (RT_LIKELY( ( !fCpu
2548 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2549 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2550 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2551 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2552 {
2553 if (RT_LIKELY( (iIterations & cPollRate) != 0
2554 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2555 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2556 else
2557 return VINF_SUCCESS;
2558 }
2559 else
2560 return VINF_SUCCESS;
2561 }
2562 else
2563 return rcStrict;
2564 }
2565 }
2566 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2567 {
2568 pVCpu->iem.s.cLongJumps++;
2569 if (pVCpu->iem.s.cActiveMappings > 0)
2570 iemMemRollback(pVCpu);
2571
2572#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2573 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
2574 {
2575 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
2576 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
2577 }
2578#endif
2579
2580#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2581 /* If pTb isn't NULL we're in iemTbExec. */
2582 if (!pTb)
2583 {
2584 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2585 pTb = pVCpu->iem.s.pCurTbR3;
2586 if (pTb)
2587 {
2588 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
2589 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
2590 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
2591 }
2592 }
2593#endif
2594 return rcStrict;
2595 }
2596 IEM_CATCH_LONGJMP_END(pVCpu);
2597 }
2598}
2599
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette