VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 104117

Last change on this file since 104117 was 104114, checked in by vboxsync, 13 months ago

VMM/IEM: Implemented a very simple alternative to iemTbAllocatorFreeupNativeSpace that frees up to 4MB of memory from a chunk in a linear fashion w/o considering TB recently use and such. Fixed a heap alloc overrun issue in iemExecMemAllocatorInit (dependant on the IEMEXECMEMALLOCATOR structure size). Fixed a use after free problem with threaded TBs and iemThreadedTbObsolete. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 121.2 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 104114 2024-03-29 01:57:23Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116
117
118/**
119 * Calculates the effective address of a ModR/M memory operand, extended version
120 * for use in the recompilers.
121 *
122 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
123 *
124 * May longjmp on internal error.
125 *
126 * @return The effective address.
127 * @param pVCpu The cross context virtual CPU structure of the calling thread.
128 * @param bRm The ModRM byte.
129 * @param cbImmAndRspOffset - First byte: The size of any immediate
130 * following the effective address opcode bytes
131 * (only for RIP relative addressing).
132 * - Second byte: RSP displacement (for POP [ESP]).
133 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
134 * SIB byte (bits 39:32).
135 *
136 * @note This must be defined in a source file with matching
137 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
138 * or implemented differently...
139 */
140RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
141{
142 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
143# define SET_SS_DEF() \
144 do \
145 { \
146 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
147 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
148 } while (0)
149
150 if (!IEM_IS_64BIT_CODE(pVCpu))
151 {
152/** @todo Check the effective address size crap! */
153 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
154 {
155 uint16_t u16EffAddr;
156
157 /* Handle the disp16 form with no registers first. */
158 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
159 {
160 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
161 *puInfo = u16EffAddr;
162 }
163 else
164 {
165 /* Get the displacment. */
166 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
167 {
168 case 0: u16EffAddr = 0; break;
169 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
170 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
171 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
172 }
173 *puInfo = u16EffAddr;
174
175 /* Add the base and index registers to the disp. */
176 switch (bRm & X86_MODRM_RM_MASK)
177 {
178 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
179 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
180 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
181 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
182 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
183 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
184 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
185 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
186 }
187 }
188
189 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
190 return u16EffAddr;
191 }
192
193 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
194 uint32_t u32EffAddr;
195 uint64_t uInfo;
196
197 /* Handle the disp32 form with no registers first. */
198 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
199 {
200 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
201 uInfo = u32EffAddr;
202 }
203 else
204 {
205 /* Get the register (or SIB) value. */
206 uInfo = 0;
207 switch ((bRm & X86_MODRM_RM_MASK))
208 {
209 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
210 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
211 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
212 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
213 case 4: /* SIB */
214 {
215 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
216 uInfo = (uint64_t)bSib << 32;
217
218 /* Get the index and scale it. */
219 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
220 {
221 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
222 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
223 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
224 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
225 case 4: u32EffAddr = 0; /*none */ break;
226 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
227 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
228 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
229 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
230 }
231 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
232
233 /* add base */
234 switch (bSib & X86_SIB_BASE_MASK)
235 {
236 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
237 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
238 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
239 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
240 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
241 case 5:
242 if ((bRm & X86_MODRM_MOD_MASK) != 0)
243 {
244 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
245 SET_SS_DEF();
246 }
247 else
248 {
249 uint32_t u32Disp;
250 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
251 u32EffAddr += u32Disp;
252 uInfo |= u32Disp;
253 }
254 break;
255 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
256 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
257 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
258 }
259 break;
260 }
261 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
262 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
263 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
264 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
265 }
266
267 /* Get and add the displacement. */
268 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
269 {
270 case 0:
271 break;
272 case 1:
273 {
274 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
275 u32EffAddr += i8Disp;
276 uInfo |= (uint32_t)(int32_t)i8Disp;
277 break;
278 }
279 case 2:
280 {
281 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
282 u32EffAddr += u32Disp;
283 uInfo |= u32Disp;
284 break;
285 }
286 default:
287 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
288 }
289 }
290
291 *puInfo = uInfo;
292 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
293 return u32EffAddr;
294 }
295
296 uint64_t u64EffAddr;
297 uint64_t uInfo;
298
299 /* Handle the rip+disp32 form with no registers first. */
300 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
301 {
302 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
303 uInfo = (uint32_t)u64EffAddr;
304 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
305 }
306 else
307 {
308 /* Get the register (or SIB) value. */
309 uInfo = 0;
310 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
311 {
312 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
313 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
314 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
315 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
316 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
317 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
318 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
319 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
320 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
321 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
322 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
323 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
324 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
325 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
326 /* SIB */
327 case 4:
328 case 12:
329 {
330 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
331 uInfo = (uint64_t)bSib << 32;
332
333 /* Get the index and scale it. */
334 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
335 {
336 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
337 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
338 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
339 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
340 case 4: u64EffAddr = 0; /*none */ break;
341 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
342 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
343 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
344 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
345 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
346 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
347 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
348 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
349 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
350 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
351 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
352 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
353 }
354 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
355
356 /* add base */
357 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
358 {
359 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
360 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
361 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
362 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
363 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
364 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
365 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
366 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
367 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
368 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
369 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
370 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
371 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
372 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
373 /* complicated encodings */
374 case 5:
375 case 13:
376 if ((bRm & X86_MODRM_MOD_MASK) != 0)
377 {
378 if (!pVCpu->iem.s.uRexB)
379 {
380 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
381 SET_SS_DEF();
382 }
383 else
384 u64EffAddr += pVCpu->cpum.GstCtx.r13;
385 }
386 else
387 {
388 uint32_t u32Disp;
389 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
390 u64EffAddr += (int32_t)u32Disp;
391 uInfo |= u32Disp;
392 }
393 break;
394 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
395 }
396 break;
397 }
398 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
399 }
400
401 /* Get and add the displacement. */
402 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
403 {
404 case 0:
405 break;
406 case 1:
407 {
408 int8_t i8Disp;
409 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
410 u64EffAddr += i8Disp;
411 uInfo |= (uint32_t)(int32_t)i8Disp;
412 break;
413 }
414 case 2:
415 {
416 uint32_t u32Disp;
417 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
418 u64EffAddr += (int32_t)u32Disp;
419 uInfo |= u32Disp;
420 break;
421 }
422 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
423 }
424
425 }
426
427 *puInfo = uInfo;
428 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
429 {
430 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
431 return u64EffAddr;
432 }
433 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
434 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
435 return u64EffAddr & UINT32_MAX;
436}
437
438
439/*********************************************************************************************************************************
440* Translation Block Cache. *
441*********************************************************************************************************************************/
442
443/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
444static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
445{
446 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
447 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
448 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
449 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
450 if (cMsSinceUse1 != cMsSinceUse2)
451 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
452 if (pTb1->cUsed != pTb2->cUsed)
453 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
454 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
455 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
456 return 0;
457}
458
459#ifdef VBOX_STRICT
460/**
461 * Assertion helper that checks a collisions list count.
462 */
463static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
464{
465 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
466 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
467 while (pTb)
468 {
469 pTb = pTb->pNext;
470 cLeft--;
471 }
472 AssertMsg(cLeft == 0,
473 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
474 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
475}
476#endif
477
478
479DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
480{
481 STAM_PROFILE_START(&pTbCache->StatPrune, a);
482
483 /*
484 * First convert the collision list to an array.
485 */
486 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
487 uintptr_t cInserted = 0;
488 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
489
490 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
491
492 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
493 {
494 apSortedTbs[cInserted++] = pTbCollision;
495 pTbCollision = pTbCollision->pNext;
496 }
497
498 /* Free any excess (impossible). */
499 if (RT_LIKELY(!pTbCollision))
500 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
501 else
502 do
503 {
504 PIEMTB pTbToFree = pTbCollision;
505 pTbCollision = pTbToFree->pNext;
506 iemTbAllocatorFree(pVCpu, pTbToFree);
507 } while (pTbCollision);
508
509 /*
510 * Sort it by most recently used and usage count.
511 */
512 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
513
514 /* We keep half the list for now. Perhaps a bit aggressive... */
515 uintptr_t const cKeep = cInserted / 2;
516
517 /* First free up the TBs we don't wish to keep (before creating the new
518 list because otherwise the free code will scan the list for each one
519 without ever finding it). */
520 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
521 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
522
523 /* Then chain the new TB together with the ones we like to keep of the
524 existing ones and insert this list into the hash table. */
525 pTbCollision = pTb;
526 for (uintptr_t idx = 0; idx < cKeep; idx++)
527 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
528 pTbCollision->pNext = NULL;
529
530 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
531#ifdef VBOX_STRICT
532 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
533#endif
534
535 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
536}
537
538
539static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
540{
541 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
542 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
543 if (!pTbOldHead)
544 {
545 pTb->pNext = NULL;
546 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
547 }
548 else
549 {
550 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
551 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
552 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
553 {
554 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
555 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
556#ifdef VBOX_STRICT
557 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
558#endif
559 }
560 else
561 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
562 }
563}
564
565
566/**
567 * Unlinks @a pTb from the hash table if found in it.
568 *
569 * @returns true if unlinked, false if not present.
570 * @param pTbCache The hash table.
571 * @param pTb The TB to remove.
572 */
573static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
574{
575 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
576 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
577 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
578
579 /*
580 * At the head of the collision list?
581 */
582 if (pTbHash == pTb)
583 {
584 if (!pTb->pNext)
585 pTbCache->apHash[idxHash] = NULL;
586 else
587 {
588 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
589 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
590#ifdef VBOX_STRICT
591 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
592#endif
593 }
594 return true;
595 }
596
597 /*
598 * Search the collision list.
599 */
600 PIEMTB const pTbHead = pTbHash;
601 while (pTbHash)
602 {
603 PIEMTB const pNextTb = pTbHash->pNext;
604 if (pNextTb == pTb)
605 {
606 pTbHash->pNext = pTb->pNext;
607 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
608#ifdef VBOX_STRICT
609 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
610#endif
611 return true;
612 }
613 pTbHash = pNextTb;
614 }
615 return false;
616}
617
618
619/**
620 * Looks up a TB for the given PC and flags in the cache.
621 *
622 * @returns Pointer to TB on success, NULL if not found.
623 * @param pVCpu The cross context virtual CPU structure of the
624 * calling thread.
625 * @param pTbCache The translation block cache.
626 * @param GCPhysPc The PC to look up a TB for.
627 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
628 * the lookup.
629 * @thread EMT(pVCpu)
630 */
631static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
632 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
633{
634 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
635 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
636 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
637#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
638 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
639#endif
640 while (pTb)
641 {
642 if (pTb->GCPhysPc == GCPhysPc)
643 {
644 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
645 {
646 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
647 {
648 STAM_COUNTER_INC(&pTbCache->cLookupHits);
649 AssertMsg(cLeft > 0, ("%d\n", cLeft));
650
651 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
652 pTb->cUsed++;
653#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
654 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
655 {
656 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
657 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
658 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
659 return pTb;
660 }
661 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
662 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
663 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
664 return iemNativeRecompile(pVCpu, pTb);
665#else
666 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
667 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
668 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
669 return pTb;
670#endif
671 }
672 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
673 }
674 else
675 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
676 }
677 else
678 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
679
680 pTb = pTb->pNext;
681#ifdef VBOX_STRICT
682 cLeft--;
683#endif
684 }
685 AssertMsg(cLeft == 0, ("%d\n", cLeft));
686 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
687 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
688 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
689 return pTb;
690}
691
692
693/*********************************************************************************************************************************
694* Translation Block Allocator.
695*********************************************************************************************************************************/
696/*
697 * Translation block allocationmanagement.
698 */
699
700#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
701# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
702 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
703# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
704 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
705# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
706 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
707#else
708# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
709 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
710# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
711 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
712# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
713 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
714#endif
715/** Makes a TB index from a chunk index and TB index within that chunk. */
716#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
717 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
718
719
720/**
721 * Initializes the TB allocator and cache for an EMT.
722 *
723 * @returns VBox status code.
724 * @param pVM The VM handle.
725 * @param cInitialTbs The initial number of translation blocks to
726 * preallocator.
727 * @param cMaxTbs The max number of translation blocks allowed.
728 * @param cbInitialExec The initial size of the executable memory allocator.
729 * @param cbMaxExec The max size of the executable memory allocator.
730 * @param cbChunkExec The chunk size for executable memory allocator. Zero
731 * or UINT32_MAX for automatically determining this.
732 * @thread EMT
733 */
734DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
735 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
736{
737 PVMCPUCC pVCpu = VMMGetCpu(pVM);
738 Assert(!pVCpu->iem.s.pTbCacheR3);
739 Assert(!pVCpu->iem.s.pTbAllocatorR3);
740
741 /*
742 * Calculate the chunk size of the TB allocator.
743 * The minimum chunk size is 2MiB.
744 */
745 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
746 uint32_t cbPerChunk = _2M;
747 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
748#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
749 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
750 uint8_t cChunkShift = 21 - cTbShift;
751 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
752#endif
753 for (;;)
754 {
755 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
756 break;
757 cbPerChunk *= 2;
758 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
759#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
760 cChunkShift += 1;
761#endif
762 }
763
764 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
765 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
766 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
767
768 cMaxTbs = cMaxChunks * cTbsPerChunk;
769
770 /*
771 * Allocate and initalize it.
772 */
773 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
774 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
775 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
776 if (!pTbAllocator)
777 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
778 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
779 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
780 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
781 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
782 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
783 pTbAllocator->cbPerChunk = cbPerChunk;
784 pTbAllocator->cMaxTbs = cMaxTbs;
785#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
786 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
787 pTbAllocator->cChunkShift = cChunkShift;
788 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
789#endif
790
791 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
792 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
793
794 /*
795 * Allocate the initial chunks.
796 */
797 for (uint32_t idxChunk = 0; ; idxChunk++)
798 {
799 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
800 if (!paTbs)
801 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
802 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
803 cbPerChunk, idxChunk, pVCpu->idCpu);
804
805 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
806 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
807 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
808 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
809 pTbAllocator->cTotalTbs += cTbsPerChunk;
810
811 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
812 break;
813 }
814
815 /*
816 * Calculate the size of the hash table. We double the max TB count and
817 * round it up to the nearest power of two.
818 */
819 uint32_t cCacheEntries = cMaxTbs * 2;
820 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
821 {
822 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
823 cCacheEntries = RT_BIT_32(iBitTop);
824 Assert(cCacheEntries >= cMaxTbs * 2);
825 }
826
827 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
828 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
829 if (!pTbCache)
830 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
831 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
832 cbTbCache, cCacheEntries, pVCpu->idCpu);
833
834 /*
835 * Initialize it (assumes zeroed by the allocator).
836 */
837 pTbCache->uMagic = IEMTBCACHE_MAGIC;
838 pTbCache->cHash = cCacheEntries;
839 pTbCache->uHashMask = cCacheEntries - 1;
840 Assert(pTbCache->cHash > pTbCache->uHashMask);
841 pVCpu->iem.s.pTbCacheR3 = pTbCache;
842
843 /*
844 * Initialize the native executable memory allocator.
845 */
846#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
847 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
848 AssertLogRelRCReturn(rc, rc);
849#else
850 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
851#endif
852
853 return VINF_SUCCESS;
854}
855
856
857/**
858 * Inner free worker.
859 */
860static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
861 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
862{
863 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
864 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
865 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
866 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
867
868 /*
869 * Unlink the TB from the hash table.
870 */
871 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
872
873 /*
874 * Free the TB itself.
875 */
876 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
877 {
878 case IEMTB_F_TYPE_THREADED:
879 pTbAllocator->cThreadedTbs -= 1;
880 RTMemFree(pTb->Thrd.paCalls);
881 break;
882#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
883 case IEMTB_F_TYPE_NATIVE:
884 pTbAllocator->cNativeTbs -= 1;
885 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
886 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
887 break;
888#endif
889 default:
890 AssertFailed();
891 }
892 RTMemFree(pTb->pabOpcodes);
893
894 pTb->pNext = NULL;
895 pTb->fFlags = 0;
896 pTb->GCPhysPc = UINT64_MAX;
897 pTb->Gen.uPtr = 0;
898 pTb->Gen.uData = 0;
899 pTb->cbOpcodes = 0;
900 pTb->pabOpcodes = NULL;
901
902 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
903 Assert(pTbAllocator->cInUseTbs > 0);
904
905 pTbAllocator->cInUseTbs -= 1;
906 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
907}
908
909
910/**
911 * Frees the given TB.
912 *
913 * @param pVCpu The cross context virtual CPU structure of the calling
914 * thread.
915 * @param pTb The translation block to free.
916 * @thread EMT(pVCpu)
917 */
918DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
919{
920 /*
921 * Validate state.
922 */
923 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
924 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
925 uint8_t const idxChunk = pTb->idxAllocChunk;
926 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
927 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
928 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
929
930 /*
931 * Call inner worker.
932 */
933 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
934}
935
936
937/**
938 * Schedules a TB for freeing when it's not longer being executed and/or part of
939 * the caller's call stack.
940 *
941 * The TB will be removed from the translation block cache, though, so it isn't
942 * possible to executed it again and the IEMTB::pNext member can be used to link
943 * it together with other TBs awaiting freeing.
944 *
945 * @param pVCpu The cross context virtual CPU structure of the calling
946 * thread.
947 * @param pTb The translation block to schedule for freeing.
948 */
949static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
950{
951 /*
952 * Validate state.
953 */
954 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
955 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
956 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
957 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
958 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
959 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
960 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
961 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
962 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
963
964 /*
965 * Remove it from the cache and prepend it to the allocator's todo list.
966 */
967 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
968
969 pTb->pNext = pTbAllocator->pDelayedFreeHead;
970 pTbAllocator->pDelayedFreeHead = pTb;
971}
972
973
974/**
975 * Processes the delayed frees.
976 *
977 * This is called by the allocator function as well as the native recompile
978 * function before making any TB or executable memory allocations respectively.
979 */
980void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
981{
982 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
983 pTbAllocator->pDelayedFreeHead = NULL;
984 while (pTb)
985 {
986 PIEMTB const pTbNext = pTb->pNext;
987 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
988 iemTbAllocatorFree(pVCpu, pTb);
989 pTb = pTbNext;
990 }
991}
992
993
994/**
995 * Grow the translation block allocator with another chunk.
996 */
997static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
998{
999 /*
1000 * Validate state.
1001 */
1002 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1003 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1004 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1005 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1006 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1007
1008 /*
1009 * Allocate a new chunk and add it to the allocator.
1010 */
1011 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1012 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1013 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1014
1015 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1016 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1017 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1018 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1019 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1020 pTbAllocator->cTotalTbs += cTbsPerChunk;
1021 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1022
1023 return VINF_SUCCESS;
1024}
1025
1026
1027/**
1028 * Allocates a TB from allocator with free block.
1029 *
1030 * This is common code to both the fast and slow allocator code paths.
1031 */
1032DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1033{
1034 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1035
1036 int idxTb;
1037 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1038 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1039 pTbAllocator->cTotalTbs,
1040 pTbAllocator->iStartHint & ~(uint32_t)63);
1041 else
1042 idxTb = -1;
1043 if (idxTb < 0)
1044 {
1045 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1046 AssertLogRelReturn(idxTb >= 0, NULL);
1047 }
1048 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1049 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1050
1051 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1052 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1053 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1054 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1055 Assert(pTb->idxAllocChunk == idxChunk);
1056
1057 pTbAllocator->cInUseTbs += 1;
1058 if (fThreaded)
1059 pTbAllocator->cThreadedTbs += 1;
1060 else
1061 pTbAllocator->cNativeTbs += 1;
1062 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1063 return pTb;
1064}
1065
1066
1067/**
1068 * Slow path for iemTbAllocatorAlloc.
1069 */
1070static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1071{
1072 /*
1073 * With some luck we can add another chunk.
1074 */
1075 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1076 {
1077 int rc = iemTbAllocatorGrow(pVCpu);
1078 if (RT_SUCCESS(rc))
1079 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1080 }
1081
1082 /*
1083 * We have to prune stuff. Sigh.
1084 *
1085 * This requires scanning for older TBs and kick them out. Not sure how to
1086 * best do this as we don't want to maintain any list of TBs ordered by last
1087 * usage time. But one reasonably simple approach would be that each time we
1088 * get here we continue a sequential scan of the allocation chunks,
1089 * considering just a smallish number of TBs and freeing a fixed portion of
1090 * them. Say, we consider the next 128 TBs, freeing the least recently used
1091 * in out of groups of 4 TBs, resulting in 32 free TBs.
1092 */
1093 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1094 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1095 uint32_t const cTbsToPrune = 128;
1096 uint32_t const cTbsPerGroup = 4;
1097 uint32_t cFreedTbs = 0;
1098#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1099 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1100#else
1101 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1102#endif
1103 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1104 idxTbPruneFrom = 0;
1105 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1106 {
1107 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1108 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1109 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1110 uint32_t cMsAge = msNow - pTb->msLastUsed;
1111 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1112
1113 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1114 {
1115#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1116 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1117 { /* likely */ }
1118 else
1119 {
1120 idxInChunk2 = 0;
1121 idxChunk2 += 1;
1122 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1123 idxChunk2 = 0;
1124 }
1125#endif
1126 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1127 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1128 if ( cMsAge2 > cMsAge
1129 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1130 {
1131 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1132 pTb = pTb2;
1133 idxChunk = idxChunk2;
1134 idxInChunk = idxInChunk2;
1135 cMsAge = cMsAge2;
1136 }
1137 }
1138
1139 /* Free the TB. */
1140 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1141 cFreedTbs++; /* paranoia */
1142 }
1143 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1144 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1145
1146 /*
1147 * Allocate a TB from the ones we've pruned.
1148 */
1149 if (cFreedTbs)
1150 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1151 return NULL;
1152}
1153
1154
1155/**
1156 * Allocate a translation block.
1157 *
1158 * @returns Pointer to block on success, NULL if we're out and is unable to
1159 * free up an existing one (very unlikely once implemented).
1160 * @param pVCpu The cross context virtual CPU structure of the calling
1161 * thread.
1162 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1163 * For statistics.
1164 */
1165DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1166{
1167 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1168 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1169
1170 /* Free any pending TBs before we proceed. */
1171 if (!pTbAllocator->pDelayedFreeHead)
1172 { /* probably likely */ }
1173 else
1174 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1175
1176 /* If the allocator is full, take slow code path.*/
1177 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1178 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1179 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1180}
1181
1182
1183/**
1184 * This is called when we're out of space for native TBs.
1185 *
1186 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1187 * The difference is that we only prune native TBs and will only free any if
1188 * there are least two in a group. The conditions under which we're called are
1189 * different - there will probably be free TBs in the table when we're called.
1190 * Therefore we increase the group size and max scan length, though we'll stop
1191 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1192 * up at least 8 TBs.
1193 */
1194void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1195{
1196 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1197 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1198
1199 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1200
1201 /*
1202 * Flush the delayed free list before we start freeing TBs indiscriminately.
1203 */
1204 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1205
1206 /*
1207 * Scan and free TBs.
1208 */
1209 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1210 uint32_t const cTbsToPrune = 128 * 8;
1211 uint32_t const cTbsPerGroup = 4 * 4;
1212 uint32_t cFreedTbs = 0;
1213 uint32_t cMaxInstrs = 0;
1214 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1215 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1216 {
1217 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1218 idxTbPruneFrom = 0;
1219 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1220 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1221 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1222 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1223 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1224
1225 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1226 {
1227 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1228 { /* likely */ }
1229 else
1230 {
1231 idxInChunk2 = 0;
1232 idxChunk2 += 1;
1233 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1234 idxChunk2 = 0;
1235 }
1236 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1237 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1238 {
1239 cNativeTbs += 1;
1240 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1241 if ( cMsAge2 > cMsAge
1242 || ( cMsAge2 == cMsAge
1243 && ( pTb2->cUsed < pTb->cUsed
1244 || ( pTb2->cUsed == pTb->cUsed
1245 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1246 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1247 {
1248 pTb = pTb2;
1249 idxChunk = idxChunk2;
1250 idxInChunk = idxInChunk2;
1251 cMsAge = cMsAge2;
1252 }
1253 }
1254 }
1255
1256 /* Free the TB if we found at least two native one in this group. */
1257 if (cNativeTbs >= 2)
1258 {
1259 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1260 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1261 cFreedTbs++;
1262 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1263 break;
1264 }
1265 }
1266 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1267
1268 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1269}
1270
1271
1272/*********************************************************************************************************************************
1273* Threaded Recompiler Core *
1274*********************************************************************************************************************************/
1275/**
1276 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1277 * @returns pszBuf.
1278 * @param fFlags The flags.
1279 * @param pszBuf The output buffer.
1280 * @param cbBuf The output buffer size. At least 32 bytes.
1281 */
1282DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1283{
1284 Assert(cbBuf >= 32);
1285 static RTSTRTUPLE const s_aModes[] =
1286 {
1287 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1288 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1289 /* [02] = */ { RT_STR_TUPLE("!2!") },
1290 /* [03] = */ { RT_STR_TUPLE("!3!") },
1291 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1292 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1293 /* [06] = */ { RT_STR_TUPLE("!6!") },
1294 /* [07] = */ { RT_STR_TUPLE("!7!") },
1295 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1296 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1297 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1298 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1299 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1300 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1301 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1302 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1303 /* [10] = */ { RT_STR_TUPLE("!10!") },
1304 /* [11] = */ { RT_STR_TUPLE("!11!") },
1305 /* [12] = */ { RT_STR_TUPLE("!12!") },
1306 /* [13] = */ { RT_STR_TUPLE("!13!") },
1307 /* [14] = */ { RT_STR_TUPLE("!14!") },
1308 /* [15] = */ { RT_STR_TUPLE("!15!") },
1309 /* [16] = */ { RT_STR_TUPLE("!16!") },
1310 /* [17] = */ { RT_STR_TUPLE("!17!") },
1311 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1312 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1313 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1314 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1315 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1316 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1317 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1318 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1319 };
1320 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1321 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1322 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1323
1324 pszBuf[off++] = ' ';
1325 pszBuf[off++] = 'C';
1326 pszBuf[off++] = 'P';
1327 pszBuf[off++] = 'L';
1328 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1329 Assert(off < 32);
1330
1331 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1332
1333 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1334 {
1335 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1336 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1337 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1338 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1339 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1340 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1341 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1342 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1343 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1344 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1345 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1346 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1347 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1348 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1349 };
1350 if (fFlags)
1351 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1352 if (s_aFlags[i].fFlag & fFlags)
1353 {
1354 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1355 pszBuf[off++] = ' ';
1356 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1357 off += s_aFlags[i].cchName;
1358 fFlags &= ~s_aFlags[i].fFlag;
1359 if (!fFlags)
1360 break;
1361 }
1362 pszBuf[off] = '\0';
1363
1364 return pszBuf;
1365}
1366
1367
1368/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1369static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1370{
1371 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1372 pDis->cbCachedInstr += cbMaxRead;
1373 RT_NOREF(cbMinRead);
1374 return VERR_NO_DATA;
1375}
1376
1377
1378DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1379{
1380 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1381
1382 char szDisBuf[512];
1383
1384 /*
1385 * Print TB info.
1386 */
1387 pHlp->pfnPrintf(pHlp,
1388 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
1389 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1390 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
1391 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1392
1393 /*
1394 * This disassembly is driven by the debug info which follows the native
1395 * code and indicates when it starts with the next guest instructions,
1396 * where labels are and such things.
1397 */
1398 DISSTATE Dis;
1399 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1400 uint32_t const cCalls = pTb->Thrd.cCalls;
1401 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1402 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1403 : DISCPUMODE_64BIT;
1404 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1405 uint8_t idxRange = UINT8_MAX;
1406 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1407 uint32_t offRange = 0;
1408 uint32_t offOpcodes = 0;
1409 uint32_t const cbOpcodes = pTb->cbOpcodes;
1410 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1411
1412 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1413 {
1414 /*
1415 * New opcode range?
1416 */
1417 if ( idxRange == UINT8_MAX
1418 || idxRange >= cRanges
1419 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1420 {
1421 idxRange += 1;
1422 if (idxRange < cRanges)
1423 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1424 else
1425 continue;
1426 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1427 + (pTb->aRanges[idxRange].idxPhysPage == 0
1428 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1429 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1430 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1431 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1432 pTb->aRanges[idxRange].idxPhysPage);
1433 GCPhysPc += offRange;
1434 }
1435
1436 /*
1437 * Disassemble another guest instruction?
1438 */
1439 if ( paCalls[iCall].offOpcode != offOpcodes
1440 && paCalls[iCall].cbOpcode > 0
1441 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1442 {
1443 offOpcodes = paCalls[iCall].offOpcode;
1444 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1445 uint32_t cbInstr = 1;
1446 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1447 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1448 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1449 if (RT_SUCCESS(rc))
1450 {
1451 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1452 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1453 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1454 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1455 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1456 }
1457 else
1458 {
1459 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1460 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1461 cbInstr = paCalls[iCall].cbOpcode;
1462 }
1463 GCPhysPc += cbInstr;
1464 offRange += cbInstr;
1465 }
1466
1467 /*
1468 * Dump call details.
1469 */
1470 pHlp->pfnPrintf(pHlp,
1471 " Call #%u to %s (%u args)\n",
1472 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1473 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1474
1475 /*
1476 * Snoop fExec.
1477 */
1478 switch (paCalls[iCall].enmFunction)
1479 {
1480 default:
1481 break;
1482 case kIemThreadedFunc_BltIn_CheckMode:
1483 fExec = paCalls[iCall].auParams[0];
1484 break;
1485 }
1486 }
1487}
1488
1489
1490
1491/**
1492 * Allocate a translation block for threadeded recompilation.
1493 *
1494 * This is allocated with maxed out call table and storage for opcode bytes,
1495 * because it's only supposed to be called once per EMT to allocate the TB
1496 * pointed to by IEMCPU::pThrdCompileTbR3.
1497 *
1498 * @returns Pointer to the translation block on success, NULL on failure.
1499 * @param pVM The cross context virtual machine structure.
1500 * @param pVCpu The cross context virtual CPU structure of the calling
1501 * thread.
1502 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1503 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1504 */
1505static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1506{
1507 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1508 if (pTb)
1509 {
1510 unsigned const cCalls = 256;
1511 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1512 if (pTb->Thrd.paCalls)
1513 {
1514 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1515 if (pTb->pabOpcodes)
1516 {
1517 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1518 pTb->Thrd.cAllocated = cCalls;
1519 pTb->Thrd.cCalls = 0;
1520 pTb->cbOpcodes = 0;
1521 pTb->pNext = NULL;
1522 pTb->cUsed = 0;
1523 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1524 pTb->idxAllocChunk = UINT8_MAX;
1525 pTb->GCPhysPc = GCPhysPc;
1526 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1527 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1528 pTb->cInstructions = 0;
1529
1530 /* Init the first opcode range. */
1531 pTb->cRanges = 1;
1532 pTb->aRanges[0].cbOpcodes = 0;
1533 pTb->aRanges[0].offOpcodes = 0;
1534 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1535 pTb->aRanges[0].u2Unused = 0;
1536 pTb->aRanges[0].idxPhysPage = 0;
1537 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1538 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1539
1540 return pTb;
1541 }
1542 RTMemFree(pTb->Thrd.paCalls);
1543 }
1544 RTMemFree(pTb);
1545 }
1546 RT_NOREF(pVM);
1547 return NULL;
1548}
1549
1550
1551/**
1552 * Called on the TB that are dedicated for recompilation before it's reused.
1553 *
1554 * @param pVCpu The cross context virtual CPU structure of the calling
1555 * thread.
1556 * @param pTb The translation block to reuse.
1557 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1558 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1559 */
1560static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1561{
1562 pTb->GCPhysPc = GCPhysPc;
1563 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1564 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1565 pTb->Thrd.cCalls = 0;
1566 pTb->cbOpcodes = 0;
1567 pTb->cInstructions = 0;
1568
1569 /* Init the first opcode range. */
1570 pTb->cRanges = 1;
1571 pTb->aRanges[0].cbOpcodes = 0;
1572 pTb->aRanges[0].offOpcodes = 0;
1573 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1574 pTb->aRanges[0].u2Unused = 0;
1575 pTb->aRanges[0].idxPhysPage = 0;
1576 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1577 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1578}
1579
1580
1581/**
1582 * Used to duplicate a threded translation block after recompilation is done.
1583 *
1584 * @returns Pointer to the translation block on success, NULL on failure.
1585 * @param pVM The cross context virtual machine structure.
1586 * @param pVCpu The cross context virtual CPU structure of the calling
1587 * thread.
1588 * @param pTbSrc The TB to duplicate.
1589 */
1590static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1591{
1592 /*
1593 * Just using the heap for now. Will make this more efficient and
1594 * complicated later, don't worry. :-)
1595 */
1596 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1597 if (pTb)
1598 {
1599 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1600 memcpy(pTb, pTbSrc, sizeof(*pTb));
1601 pTb->idxAllocChunk = idxAllocChunk;
1602
1603 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1604 Assert(cCalls > 0);
1605 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1606 if (pTb->Thrd.paCalls)
1607 {
1608 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1609 Assert(cbOpcodes > 0);
1610 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1611 if (pTb->pabOpcodes)
1612 {
1613 pTb->Thrd.cAllocated = cCalls;
1614 pTb->pNext = NULL;
1615 pTb->cUsed = 0;
1616 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1617 pTb->fFlags = pTbSrc->fFlags;
1618
1619 return pTb;
1620 }
1621 RTMemFree(pTb->Thrd.paCalls);
1622 }
1623 iemTbAllocatorFree(pVCpu, pTb);
1624 }
1625 RT_NOREF(pVM);
1626 return NULL;
1627
1628}
1629
1630
1631/**
1632 * Adds the given TB to the hash table.
1633 *
1634 * @param pVCpu The cross context virtual CPU structure of the calling
1635 * thread.
1636 * @param pTbCache The cache to add it to.
1637 * @param pTb The translation block to add.
1638 */
1639static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1640{
1641 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1642
1643 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1644 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1645 if (LogIs12Enabled())
1646 {
1647 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1648 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1649 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1650 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1651 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1652 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1653 pTb->aRanges[idxRange].idxPhysPage == 0
1654 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1655 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1656 }
1657}
1658
1659
1660/**
1661 * Called by opcode verifier functions when they detect a problem.
1662 */
1663void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1664{
1665 /* We cannot free the current TB (indicated by fSafeToFree) because:
1666 - A threaded TB will have its current call entry accessed
1667 to update pVCpu->iem.s.cInstructions.
1668 - A native TB will have code left to execute. */
1669 if (fSafeToFree)
1670 iemTbAllocatorFree(pVCpu, pTb);
1671 else
1672 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1673}
1674
1675
1676/*
1677 * Real code.
1678 */
1679
1680#ifdef LOG_ENABLED
1681/**
1682 * Logs the current instruction.
1683 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1684 * @param pszFunction The IEM function doing the execution.
1685 * @param idxInstr The instruction number in the block.
1686 */
1687static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1688{
1689# ifdef IN_RING3
1690 if (LogIs2Enabled())
1691 {
1692 char szInstr[256];
1693 uint32_t cbInstr = 0;
1694 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1695 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1696 szInstr, sizeof(szInstr), &cbInstr);
1697
1698 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1699 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1700 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1701 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1702 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1703 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1704 " %s\n"
1705 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1706 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1707 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1708 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1709 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1710 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1711 szInstr));
1712
1713 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1714 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1715 }
1716 else
1717# endif
1718 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1719 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1720}
1721#endif /* LOG_ENABLED */
1722
1723
1724#if 0
1725static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1726{
1727 RT_NOREF(pVM, pVCpu);
1728 return rcStrict;
1729}
1730#endif
1731
1732
1733/**
1734 * Initializes the decoder state when compiling TBs.
1735 *
1736 * This presumes that fExec has already be initialized.
1737 *
1738 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1739 * to apply fixes to them as well.
1740 *
1741 * @param pVCpu The cross context virtual CPU structure of the calling
1742 * thread.
1743 * @param fReInit Clear for the first call for a TB, set for subsequent
1744 * calls from inside the compile loop where we can skip a
1745 * couple of things.
1746 * @param fExtraFlags The extra translation block flags when @a fReInit is
1747 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1748 * checked.
1749 */
1750DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1751{
1752 /* ASSUMES: That iemInitExec was already called and that anyone changing
1753 CPU state affecting the fExec bits since then will have updated fExec! */
1754 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1755 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1756
1757 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1758
1759 /* Decoder state: */
1760 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1761 pVCpu->iem.s.enmEffAddrMode = enmMode;
1762 if (enmMode != IEMMODE_64BIT)
1763 {
1764 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1765 pVCpu->iem.s.enmEffOpSize = enmMode;
1766 }
1767 else
1768 {
1769 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1770 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1771 }
1772 pVCpu->iem.s.fPrefixes = 0;
1773 pVCpu->iem.s.uRexReg = 0;
1774 pVCpu->iem.s.uRexB = 0;
1775 pVCpu->iem.s.uRexIndex = 0;
1776 pVCpu->iem.s.idxPrefix = 0;
1777 pVCpu->iem.s.uVex3rdReg = 0;
1778 pVCpu->iem.s.uVexLength = 0;
1779 pVCpu->iem.s.fEvexStuff = 0;
1780 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1781 pVCpu->iem.s.offModRm = 0;
1782 pVCpu->iem.s.iNextMapping = 0;
1783
1784 if (!fReInit)
1785 {
1786 pVCpu->iem.s.cActiveMappings = 0;
1787 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1788 pVCpu->iem.s.fEndTb = false;
1789 pVCpu->iem.s.fTbCheckOpcodes = false;
1790 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1791 pVCpu->iem.s.fTbCrossedPage = false;
1792 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1793 pVCpu->iem.s.fTbCurInstrIsSti = false;
1794 /* Force RF clearing and TF checking on first instruction in the block
1795 as we don't really know what came before and should assume the worst: */
1796 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1797 }
1798 else
1799 {
1800 Assert(pVCpu->iem.s.cActiveMappings == 0);
1801 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1802 Assert(pVCpu->iem.s.fEndTb == false);
1803 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1804 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1805 }
1806 pVCpu->iem.s.fTbCurInstr = 0;
1807
1808#ifdef DBGFTRACE_ENABLED
1809 switch (IEM_GET_CPU_MODE(pVCpu))
1810 {
1811 case IEMMODE_64BIT:
1812 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1813 break;
1814 case IEMMODE_32BIT:
1815 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1816 break;
1817 case IEMMODE_16BIT:
1818 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1819 break;
1820 }
1821#endif
1822}
1823
1824
1825/**
1826 * Initializes the opcode fetcher when starting the compilation.
1827 *
1828 * @param pVCpu The cross context virtual CPU structure of the calling
1829 * thread.
1830 */
1831DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1832{
1833 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1834#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1835 pVCpu->iem.s.offOpcode = 0;
1836#else
1837 RT_NOREF(pVCpu);
1838#endif
1839}
1840
1841
1842/**
1843 * Re-initializes the opcode fetcher between instructions while compiling.
1844 *
1845 * @param pVCpu The cross context virtual CPU structure of the calling
1846 * thread.
1847 */
1848DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1849{
1850 if (pVCpu->iem.s.pbInstrBuf)
1851 {
1852 uint64_t off = pVCpu->cpum.GstCtx.rip;
1853 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1854 off += pVCpu->cpum.GstCtx.cs.u64Base;
1855 off -= pVCpu->iem.s.uInstrBufPc;
1856 if (off < pVCpu->iem.s.cbInstrBufTotal)
1857 {
1858 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1859 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1860 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1861 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1862 else
1863 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1864 }
1865 else
1866 {
1867 pVCpu->iem.s.pbInstrBuf = NULL;
1868 pVCpu->iem.s.offInstrNextByte = 0;
1869 pVCpu->iem.s.offCurInstrStart = 0;
1870 pVCpu->iem.s.cbInstrBuf = 0;
1871 pVCpu->iem.s.cbInstrBufTotal = 0;
1872 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1873 }
1874 }
1875 else
1876 {
1877 pVCpu->iem.s.offInstrNextByte = 0;
1878 pVCpu->iem.s.offCurInstrStart = 0;
1879 pVCpu->iem.s.cbInstrBuf = 0;
1880 pVCpu->iem.s.cbInstrBufTotal = 0;
1881#ifdef VBOX_STRICT
1882 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1883#endif
1884 }
1885#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1886 pVCpu->iem.s.offOpcode = 0;
1887#endif
1888}
1889
1890#ifdef LOG_ENABLED
1891
1892/**
1893 * Inserts a NOP call.
1894 *
1895 * This is for debugging.
1896 *
1897 * @returns true on success, false if we're out of call entries.
1898 * @param pTb The translation block being compiled.
1899 */
1900bool iemThreadedCompileEmitNop(PIEMTB pTb)
1901{
1902 /* Emit the call. */
1903 uint32_t const idxCall = pTb->Thrd.cCalls;
1904 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1905 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1906 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1907 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
1908 pCall->idxInstr = pTb->cInstructions - 1;
1909 pCall->uUnused0 = 0;
1910 pCall->offOpcode = 0;
1911 pCall->cbOpcode = 0;
1912 pCall->idxRange = 0;
1913 pCall->auParams[0] = 0;
1914 pCall->auParams[1] = 0;
1915 pCall->auParams[2] = 0;
1916 return true;
1917}
1918
1919
1920/**
1921 * Called by iemThreadedCompile if cpu state logging is desired.
1922 *
1923 * @returns true on success, false if we're out of call entries.
1924 * @param pTb The translation block being compiled.
1925 */
1926bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
1927{
1928 /* Emit the call. */
1929 uint32_t const idxCall = pTb->Thrd.cCalls;
1930 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1931 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1932 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1933 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
1934 pCall->idxInstr = pTb->cInstructions - 1;
1935 pCall->uUnused0 = 0;
1936 pCall->offOpcode = 0;
1937 pCall->cbOpcode = 0;
1938 pCall->idxRange = 0;
1939 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
1940 pCall->auParams[1] = 0;
1941 pCall->auParams[2] = 0;
1942 return true;
1943}
1944
1945#endif /* LOG_ENABLED */
1946
1947DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1948{
1949 switch (cbInstr)
1950 {
1951 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1952 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1953 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1954 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1955 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1956 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1957 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1958 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1959 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1960 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1961 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1962 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1963 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1964 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1965 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1966 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1967 }
1968}
1969
1970
1971/**
1972 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1973 *
1974 * - CS LIM check required.
1975 * - Must recheck opcode bytes.
1976 * - Previous instruction branched.
1977 * - TLB load detected, probably due to page crossing.
1978 *
1979 * @returns true if everything went well, false if we're out of space in the TB
1980 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1981 * @param pVCpu The cross context virtual CPU structure of the calling
1982 * thread.
1983 * @param pTb The translation block being compiled.
1984 */
1985bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1986{
1987 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1988 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1989#if 0
1990 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1991 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1992#endif
1993
1994 /*
1995 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1996 * see if it's needed to start checking.
1997 */
1998 bool fConsiderCsLimChecking;
1999 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2000 if ( fMode == IEM_F_MODE_X86_64BIT
2001 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2002 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2003 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2004 fConsiderCsLimChecking = false; /* already enabled or not needed */
2005 else
2006 {
2007 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2008 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2009 fConsiderCsLimChecking = true; /* likely */
2010 else
2011 {
2012 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2013 return false;
2014 }
2015 }
2016
2017 /*
2018 * Prepare call now, even before we know if can accept the instruction in this TB.
2019 * This allows us amending parameters w/o making every case suffer.
2020 */
2021 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2022 uint16_t const offOpcode = pTb->cbOpcodes;
2023 uint8_t idxRange = pTb->cRanges - 1;
2024
2025 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2026 pCall->idxInstr = pTb->cInstructions;
2027 pCall->offOpcode = offOpcode;
2028 pCall->idxRange = idxRange;
2029 pCall->cbOpcode = cbInstr;
2030 pCall->auParams[0] = (uint32_t)cbInstr
2031 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2032 /* The upper dword is sometimes used for cbStartPage. */;
2033 pCall->auParams[1] = idxRange;
2034 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2035
2036/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2037 * gotten onto. If we do, stop */
2038
2039 /*
2040 * Case 1: We've branched (RIP changed).
2041 *
2042 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2043 * Req: 1 extra range, no extra phys.
2044 *
2045 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2046 * necessary (fTbCrossedPage is true).
2047 * Req: 1 extra range, probably 1 extra phys page entry.
2048 *
2049 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2050 * but in addition we cross into the following page and require
2051 * another TLB load.
2052 * Req: 2 extra ranges, probably 2 extra phys page entries.
2053 *
2054 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2055 * the following page (thus fTbCrossedPage is true).
2056 * Req: 2 extra ranges, probably 1 extra phys page entry.
2057 *
2058 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2059 * it may trigger "spuriously" from the CPU point of view because of
2060 * physical page changes that'll invalid the physical TLB and trigger a
2061 * call to the function. In theory this be a big deal, just a bit
2062 * performance loss as we'll pick the LoadingTlb variants.
2063 *
2064 * Note! We do not currently optimize branching to the next instruction (sorry
2065 * 32-bit PIC code). We could maybe do that in the branching code that
2066 * sets (or not) fTbBranched.
2067 */
2068 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2069 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2070 * code. This'll require filtering out far jmps and calls, as they
2071 * load CS which should technically be considered indirect since the
2072 * GDT/LDT entry's base address can be modified independently from
2073 * the code. */
2074 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2075 {
2076 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2077 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2078 {
2079 /* 1a + 1b - instruction fully within the branched to page. */
2080 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2081 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2082
2083 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2084 {
2085 /* Check that we've got a free range. */
2086 idxRange += 1;
2087 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2088 { /* likely */ }
2089 else
2090 {
2091 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2092 return false;
2093 }
2094 pCall->idxRange = idxRange;
2095 pCall->auParams[1] = idxRange;
2096 pCall->auParams[2] = 0;
2097
2098 /* Check that we've got a free page slot. */
2099 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2100 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2101 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2102 pTb->aRanges[idxRange].idxPhysPage = 0;
2103 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2104 || pTb->aGCPhysPages[0] == GCPhysNew)
2105 {
2106 pTb->aGCPhysPages[0] = GCPhysNew;
2107 pTb->aRanges[idxRange].idxPhysPage = 1;
2108 }
2109 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2110 || pTb->aGCPhysPages[1] == GCPhysNew)
2111 {
2112 pTb->aGCPhysPages[1] = GCPhysNew;
2113 pTb->aRanges[idxRange].idxPhysPage = 2;
2114 }
2115 else
2116 {
2117 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2118 return false;
2119 }
2120
2121 /* Finish setting up the new range. */
2122 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2123 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2124 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2125 pTb->aRanges[idxRange].u2Unused = 0;
2126 pTb->cRanges++;
2127 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2128 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2129 pTb->aRanges[idxRange].offOpcodes));
2130 }
2131 else
2132 {
2133 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2134 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2135 }
2136
2137 /* Determin which function we need to load & check.
2138 Note! For jumps to a new page, we'll set both fTbBranched and
2139 fTbCrossedPage to avoid unnecessary TLB work for intra
2140 page branching */
2141 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2142 || pVCpu->iem.s.fTbCrossedPage)
2143 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2144 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2145 : !fConsiderCsLimChecking
2146 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2147 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2148 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2149 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2150 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2151 : !fConsiderCsLimChecking
2152 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2153 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2154 else
2155 {
2156 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2157 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2158 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2159 : !fConsiderCsLimChecking
2160 ? kIemThreadedFunc_BltIn_CheckOpcodes
2161 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2162 }
2163 }
2164 else
2165 {
2166 /* 1c + 1d - instruction crosses pages. */
2167 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2168 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2169
2170 /* Lazy bird: Check that this isn't case 1c, since we've already
2171 load the first physical address. End the TB and
2172 make it a case 2b instead.
2173
2174 Hmm. Too much bother to detect, so just do the same
2175 with case 1d as well. */
2176#if 0 /** @todo get back to this later when we've got the actual branch code in
2177 * place. */
2178 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2179
2180 /* Check that we've got two free ranges. */
2181 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2182 { /* likely */ }
2183 else
2184 return false;
2185 idxRange += 1;
2186 pCall->idxRange = idxRange;
2187 pCall->auParams[1] = idxRange;
2188 pCall->auParams[2] = 0;
2189
2190 /* ... */
2191
2192#else
2193 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2194 return false;
2195#endif
2196 }
2197 }
2198
2199 /*
2200 * Case 2: Page crossing.
2201 *
2202 * Sub-case 2a: The instruction starts on the first byte in the next page.
2203 *
2204 * Sub-case 2b: The instruction has opcode bytes in both the current and
2205 * following page.
2206 *
2207 * Both cases requires a new range table entry and probably a new physical
2208 * page entry. The difference is in which functions to emit and whether to
2209 * add bytes to the current range.
2210 */
2211 else if (pVCpu->iem.s.fTbCrossedPage)
2212 {
2213 /* Check that we've got a free range. */
2214 idxRange += 1;
2215 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2216 { /* likely */ }
2217 else
2218 {
2219 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2220 return false;
2221 }
2222
2223 /* Check that we've got a free page slot. */
2224 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2225 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2226 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2227 pTb->aRanges[idxRange].idxPhysPage = 0;
2228 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2229 || pTb->aGCPhysPages[0] == GCPhysNew)
2230 {
2231 pTb->aGCPhysPages[0] = GCPhysNew;
2232 pTb->aRanges[idxRange].idxPhysPage = 1;
2233 }
2234 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2235 || pTb->aGCPhysPages[1] == GCPhysNew)
2236 {
2237 pTb->aGCPhysPages[1] = GCPhysNew;
2238 pTb->aRanges[idxRange].idxPhysPage = 2;
2239 }
2240 else
2241 {
2242 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2243 return false;
2244 }
2245
2246 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2247 {
2248 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2249 pCall->idxRange = idxRange;
2250 pCall->auParams[1] = idxRange;
2251 pCall->auParams[2] = 0;
2252
2253 /* Finish setting up the new range. */
2254 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2255 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2256 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2257 pTb->aRanges[idxRange].u2Unused = 0;
2258 pTb->cRanges++;
2259 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2260 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2261 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2262
2263 /* Determin which function we need to load & check. */
2264 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2265 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2266 : !fConsiderCsLimChecking
2267 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2268 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2269 }
2270 else
2271 {
2272 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2273 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2274 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2275 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2276
2277 /* We've good. Split the instruction over the old and new range table entries. */
2278 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2279
2280 pTb->aRanges[idxRange].offPhysPage = 0;
2281 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2282 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2283 pTb->aRanges[idxRange].u2Unused = 0;
2284 pTb->cRanges++;
2285 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2286 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2287 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2288
2289 /* Determin which function we need to load & check. */
2290 if (pVCpu->iem.s.fTbCheckOpcodes)
2291 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2292 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2293 : !fConsiderCsLimChecking
2294 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2295 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2296 else
2297 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2298 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2299 : !fConsiderCsLimChecking
2300 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2301 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2302 }
2303 }
2304
2305 /*
2306 * Regular case: No new range required.
2307 */
2308 else
2309 {
2310 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2311 if (pVCpu->iem.s.fTbCheckOpcodes)
2312 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2313 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2314 : kIemThreadedFunc_BltIn_CheckOpcodes;
2315 else
2316 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2317
2318 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2319 pTb->cbOpcodes = offOpcode + cbInstr;
2320 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2321 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2322 }
2323
2324 /*
2325 * Commit the call.
2326 */
2327 pTb->Thrd.cCalls++;
2328
2329 /*
2330 * Clear state.
2331 */
2332 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2333 pVCpu->iem.s.fTbCrossedPage = false;
2334 pVCpu->iem.s.fTbCheckOpcodes = false;
2335
2336 /*
2337 * Copy opcode bytes.
2338 */
2339 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2340 pTb->cbOpcodes = offOpcode + cbInstr;
2341 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2342
2343 return true;
2344}
2345
2346
2347/**
2348 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2349 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2350 *
2351 * @returns true if anything is pending, false if not.
2352 * @param pVCpu The cross context virtual CPU structure of the calling
2353 * thread.
2354 */
2355DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2356{
2357 uint64_t fCpu = pVCpu->fLocalForcedActions;
2358 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2359#if 1
2360 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2361 if (RT_LIKELY( !fCpu
2362 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2363 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2364 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2365 return false;
2366 return true;
2367#else
2368 return false;
2369#endif
2370
2371}
2372
2373
2374/**
2375 * Called by iemThreadedCompile when a block requires a mode check.
2376 *
2377 * @returns true if we should continue, false if we're out of call entries.
2378 * @param pVCpu The cross context virtual CPU structure of the calling
2379 * thread.
2380 * @param pTb The translation block being compiled.
2381 */
2382static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2383{
2384 /* Emit the call. */
2385 uint32_t const idxCall = pTb->Thrd.cCalls;
2386 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2387 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2388 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2389 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2390 pCall->idxInstr = pTb->cInstructions - 1;
2391 pCall->uUnused0 = 0;
2392 pCall->offOpcode = 0;
2393 pCall->cbOpcode = 0;
2394 pCall->idxRange = 0;
2395 pCall->auParams[0] = pVCpu->iem.s.fExec;
2396 pCall->auParams[1] = 0;
2397 pCall->auParams[2] = 0;
2398 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2399 return true;
2400}
2401
2402
2403/**
2404 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2405 * set.
2406 *
2407 * @returns true if we should continue, false if an IRQ is deliverable or a
2408 * relevant force flag is pending.
2409 * @param pVCpu The cross context virtual CPU structure of the calling
2410 * thread.
2411 * @param pTb The translation block being compiled.
2412 * @sa iemThreadedCompileCheckIrq
2413 */
2414bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2415{
2416 /*
2417 * Skip this we've already emitted a call after the previous instruction
2418 * or if it's the first call, as we're always checking FFs between blocks.
2419 */
2420 uint32_t const idxCall = pTb->Thrd.cCalls;
2421 if ( idxCall > 0
2422 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2423 {
2424 /* Emit the call. */
2425 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2426 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2427 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2428 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2429 pCall->idxInstr = pTb->cInstructions;
2430 pCall->uUnused0 = 0;
2431 pCall->offOpcode = 0;
2432 pCall->cbOpcode = 0;
2433 pCall->idxRange = 0;
2434 pCall->auParams[0] = 0;
2435 pCall->auParams[1] = 0;
2436 pCall->auParams[2] = 0;
2437 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2438
2439 /* Reset the IRQ check value. */
2440 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2441
2442 /*
2443 * Check for deliverable IRQs and pending force flags.
2444 */
2445 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2446 }
2447 return true; /* continue */
2448}
2449
2450
2451/**
2452 * Emits an IRQ check call and checks for pending IRQs.
2453 *
2454 * @returns true if we should continue, false if an IRQ is deliverable or a
2455 * relevant force flag is pending.
2456 * @param pVCpu The cross context virtual CPU structure of the calling
2457 * thread.
2458 * @param pTb The transation block.
2459 * @sa iemThreadedCompileBeginEmitCallsComplications
2460 */
2461static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2462{
2463 /* Check again in a little bit, unless it is immediately following an STI
2464 in which case we *must* check immediately after the next instruction
2465 as well in case it's executed with interrupt inhibition. We could
2466 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2467 bs3-timers-1 which is doing sti + sti + cli. */
2468 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2469 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2470 else
2471 {
2472 pVCpu->iem.s.fTbCurInstrIsSti = false;
2473 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2474 }
2475 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2476
2477 /*
2478 * Emit the call.
2479 */
2480 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2481 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2482 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2483 pCall->idxInstr = pTb->cInstructions;
2484 pCall->uUnused0 = 0;
2485 pCall->offOpcode = 0;
2486 pCall->cbOpcode = 0;
2487 pCall->idxRange = 0;
2488 pCall->auParams[0] = 0;
2489 pCall->auParams[1] = 0;
2490 pCall->auParams[2] = 0;
2491
2492 /*
2493 * Check for deliverable IRQs and pending force flags.
2494 */
2495 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2496}
2497
2498
2499/**
2500 * Compiles a new TB and executes it.
2501 *
2502 * We combine compilation and execution here as it makes it simpler code flow
2503 * in the main loop and it allows interpreting while compiling if we want to
2504 * explore that option.
2505 *
2506 * @returns Strict VBox status code.
2507 * @param pVM The cross context virtual machine structure.
2508 * @param pVCpu The cross context virtual CPU structure of the calling
2509 * thread.
2510 * @param GCPhysPc The physical address corresponding to the current
2511 * RIP+CS.BASE.
2512 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2513 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2514 */
2515static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2516{
2517 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2518 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2519
2520 /*
2521 * Get the TB we use for the recompiling. This is a maxed-out TB so
2522 * that'll we'll make a more efficient copy of when we're done compiling.
2523 */
2524 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2525 if (pTb)
2526 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2527 else
2528 {
2529 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2530 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2531 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2532 }
2533
2534 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2535 functions may get at it. */
2536 pVCpu->iem.s.pCurTbR3 = pTb;
2537
2538#if 0
2539 /* Make sure the CheckIrq condition matches the one in EM. */
2540 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2541 const uint32_t cZeroCalls = 1;
2542#else
2543 const uint32_t cZeroCalls = 0;
2544#endif
2545
2546 /*
2547 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2548 */
2549 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2550 iemThreadedCompileInitOpcodeFetching(pVCpu);
2551 VBOXSTRICTRC rcStrict;
2552 for (;;)
2553 {
2554 /* Process the next instruction. */
2555#ifdef LOG_ENABLED
2556 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2557 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2558 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2559 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2560#endif
2561 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2562 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2563
2564 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2565#if 0
2566 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2567 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2568 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2569#endif
2570 if ( rcStrict == VINF_SUCCESS
2571 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2572 && !pVCpu->iem.s.fEndTb)
2573 {
2574 Assert(pTb->Thrd.cCalls > cCallsPrev);
2575 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2576
2577 pVCpu->iem.s.cInstructions++;
2578
2579 /* Check for mode change _after_ certain CIMPL calls, so check that
2580 we continue executing with the same mode value. */
2581 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2582 { /* probable */ }
2583 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2584 { /* extremely likely */ }
2585 else
2586 break;
2587
2588#if defined(LOG_ENABLED) && 0 /* for debugging */
2589 //iemThreadedCompileEmitNop(pTb);
2590 iemThreadedCompileEmitLogCpuState(pTb);
2591#endif
2592 }
2593 else
2594 {
2595 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2596 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2597 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2598 rcStrict = VINF_SUCCESS;
2599
2600 if (pTb->Thrd.cCalls > cZeroCalls)
2601 {
2602 if (cCallsPrev != pTb->Thrd.cCalls)
2603 pVCpu->iem.s.cInstructions++;
2604 break;
2605 }
2606
2607 pVCpu->iem.s.pCurTbR3 = NULL;
2608 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2609 }
2610
2611 /* Check for IRQs? */
2612 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2613 pVCpu->iem.s.cInstrTillIrqCheck--;
2614 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2615 break;
2616
2617 /* Still space in the TB? */
2618 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2619 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated)
2620 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2621 else
2622 {
2623 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2624 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2625 break;
2626 }
2627 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2628 }
2629
2630 /*
2631 * Duplicate the TB into a completed one and link it.
2632 */
2633 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2634 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2635
2636 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2637
2638#ifdef IEM_COMPILE_ONLY_MODE
2639 /*
2640 * Execute the translation block.
2641 */
2642#endif
2643
2644 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2645}
2646
2647
2648
2649/*********************************************************************************************************************************
2650* Recompiled Execution Core *
2651*********************************************************************************************************************************/
2652
2653
2654/**
2655 * Executes a translation block.
2656 *
2657 * @returns Strict VBox status code.
2658 * @param pVCpu The cross context virtual CPU structure of the calling
2659 * thread.
2660 * @param pTb The translation block to execute.
2661 */
2662static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2663{
2664 /*
2665 * Check the opcodes in the first page before starting execution.
2666 */
2667/** @todo this test should take IEMTB_F_CS_LIM_CHECKS into account or something.
2668 * The 'near jmp+call' test in bs3-cpu-basic-2 triggers the 2nd assertion here by
2669 * altering the CS limit such that only one or the two instruction bytes are valid.
2670 * Since it's a CS_LIMT problem, the pbInstrBuf is good for the full length, and
2671 * the test succeeds if skipped, but we assert in debug builds. */
2672 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2673 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2674 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2675 { /* likely */ }
2676 else
2677 {
2678 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2679 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2680 return VINF_SUCCESS;
2681 }
2682
2683 /*
2684 * Set the current TB so CIMPL functions may get at it.
2685 */
2686 pVCpu->iem.s.pCurTbR3 = pTb;
2687
2688 /*
2689 * Execute the block.
2690 */
2691#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2692 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2693 {
2694 pVCpu->iem.s.cTbExecNative++;
2695# ifdef LOG_ENABLED
2696 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2697# endif
2698# ifdef RT_ARCH_AMD64
2699 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2700# else
2701 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2702# endif
2703# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2704 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2705# endif
2706# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
2707 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
2708# endif
2709 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2710 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2711 { /* likely */ }
2712 else
2713 {
2714 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2715 pVCpu->iem.s.pCurTbR3 = NULL;
2716 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2717
2718 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2719 only to break out of TB execution early. */
2720 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2721 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2722
2723 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2724 and converted to VINF_SUCCESS or whatever is appropriate. */
2725 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2726 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2727
2728 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2729 }
2730 }
2731 else
2732#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2733 {
2734 /*
2735 * The threaded execution loop.
2736 */
2737 pVCpu->iem.s.cTbExecThreaded++;
2738#ifdef LOG_ENABLED
2739 uint64_t uRipPrev = UINT64_MAX;
2740#endif
2741 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2742 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2743 while (cCallsLeft-- > 0)
2744 {
2745#ifdef LOG_ENABLED
2746 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2747 {
2748 uRipPrev = pVCpu->cpum.GstCtx.rip;
2749 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2750 }
2751 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2752 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2753 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2754#endif
2755#ifdef VBOX_WITH_STATISTICS
2756 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2757 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2758#endif
2759 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2760 pCallEntry->auParams[0],
2761 pCallEntry->auParams[1],
2762 pCallEntry->auParams[2]);
2763 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2764 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2765 pCallEntry++;
2766 else
2767 {
2768 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2769 pVCpu->iem.s.pCurTbR3 = NULL;
2770 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2771
2772 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2773 only to break out of TB execution early. */
2774 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2775 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2776 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2777 }
2778 }
2779 }
2780
2781 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2782 pVCpu->iem.s.pCurTbR3 = NULL;
2783 return VINF_SUCCESS;
2784}
2785
2786
2787/**
2788 * This is called when the PC doesn't match the current pbInstrBuf.
2789 *
2790 * Upon return, we're ready for opcode fetching. But please note that
2791 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2792 * MMIO or unassigned).
2793 */
2794static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2795{
2796 pVCpu->iem.s.pbInstrBuf = NULL;
2797 pVCpu->iem.s.offCurInstrStart = 0;
2798 pVCpu->iem.s.offInstrNextByte = 0;
2799 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2800 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2801}
2802
2803
2804/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2805DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2806{
2807 /*
2808 * Set uCurTbStartPc to RIP and calc the effective PC.
2809 */
2810 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2811 pVCpu->iem.s.uCurTbStartPc = uPc;
2812 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2813 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2814
2815 /*
2816 * Advance within the current buffer (PAGE) when possible.
2817 */
2818 if (pVCpu->iem.s.pbInstrBuf)
2819 {
2820 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2821 if (off < pVCpu->iem.s.cbInstrBufTotal)
2822 {
2823 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2824 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2825 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2826 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2827 else
2828 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2829
2830 return pVCpu->iem.s.GCPhysInstrBuf + off;
2831 }
2832 }
2833 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2834}
2835
2836
2837/**
2838 * Determines the extra IEMTB_F_XXX flags.
2839 *
2840 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2841 * IEMTB_F_CS_LIM_CHECKS (or zero).
2842 * @param pVCpu The cross context virtual CPU structure of the calling
2843 * thread.
2844 */
2845DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2846{
2847 uint32_t fRet = 0;
2848
2849 /*
2850 * Determine the inhibit bits.
2851 */
2852 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2853 { /* typical */ }
2854 else
2855 {
2856 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2857 fRet |= IEMTB_F_INHIBIT_SHADOW;
2858 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2859 fRet |= IEMTB_F_INHIBIT_NMI;
2860 }
2861
2862 /*
2863 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2864 * likely to go invalid before the end of the translation block.
2865 */
2866 if (IEM_IS_64BIT_CODE(pVCpu))
2867 return fRet;
2868
2869 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2870 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2871 return fRet;
2872 return fRet | IEMTB_F_CS_LIM_CHECKS;
2873}
2874
2875
2876VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2877{
2878 /*
2879 * See if there is an interrupt pending in TRPM, inject it if we can.
2880 */
2881 if (!TRPMHasTrap(pVCpu))
2882 { /* likely */ }
2883 else
2884 {
2885 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2886 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2887 { /*likely */ }
2888 else
2889 return rcStrict;
2890 }
2891
2892 /*
2893 * Init the execution environment.
2894 */
2895#if 1 /** @todo this seems like a good idea, however if we ever share memory
2896 * directly with other threads on the host, it isn't necessarily... */
2897 if (pVM->cCpus == 1)
2898 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
2899 else
2900#endif
2901 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2902 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2903 { }
2904 else
2905 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2906
2907 /*
2908 * Run-loop.
2909 *
2910 * If we're using setjmp/longjmp we combine all the catching here to avoid
2911 * having to call setjmp for each block we're executing.
2912 */
2913 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2914 for (;;)
2915 {
2916 PIEMTB pTb = NULL;
2917 VBOXSTRICTRC rcStrict;
2918 IEM_TRY_SETJMP(pVCpu, rcStrict)
2919 {
2920 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2921 for (uint32_t iIterations = 0; ; iIterations++)
2922 {
2923 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2924 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2925 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
2926 {
2927 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2928 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2929 if (pTb)
2930 rcStrict = iemTbExec(pVCpu, pTb);
2931 else
2932 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2933 }
2934 else
2935 {
2936 /* This can only happen if the current PC cannot be translated into a
2937 host pointer, which means we're in MMIO or unmapped memory... */
2938#if defined(VBOX_STRICT) && defined(IN_RING3)
2939 rcStrict = DBGFSTOP(pVM);
2940 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
2941 return rcStrict;
2942#endif
2943 rcStrict = IEMExecLots(pVCpu, 2048, cPollRate, NULL);
2944 }
2945 if (rcStrict == VINF_SUCCESS)
2946 {
2947 Assert(pVCpu->iem.s.cActiveMappings == 0);
2948
2949 uint64_t fCpu = pVCpu->fLocalForcedActions;
2950 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2951 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2952 | VMCPU_FF_TLB_FLUSH
2953 | VMCPU_FF_UNHALT );
2954 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2955 if (RT_LIKELY( ( !fCpu
2956 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2957 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2958 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2959 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2960 {
2961 if (RT_LIKELY( (iIterations & cPollRate) != 0
2962 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2963 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2964 else
2965 return VINF_SUCCESS;
2966 }
2967 else
2968 return VINF_SUCCESS;
2969 }
2970 else
2971 return rcStrict;
2972 }
2973 }
2974 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2975 {
2976 pVCpu->iem.s.cLongJumps++;
2977#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2978 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2979#endif
2980 if (pVCpu->iem.s.cActiveMappings > 0)
2981 iemMemRollback(pVCpu);
2982
2983#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2984 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
2985 {
2986 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
2987 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
2988 }
2989#endif
2990
2991#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2992 /* If pTb isn't NULL we're in iemTbExec. */
2993 if (!pTb)
2994 {
2995 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2996 pTb = pVCpu->iem.s.pCurTbR3;
2997 if (pTb)
2998 {
2999 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3000 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3001 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3002 }
3003 }
3004#endif
3005 return rcStrict;
3006 }
3007 IEM_CATCH_LONGJMP_END(pVCpu);
3008 }
3009}
3010
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette