VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 104329

Last change on this file since 104329 was 104322, checked in by vboxsync, 10 months ago

VMM/IEM: Have a single TB prologue which serves as an entry point to the TB, bugref:10653

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 121.6 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 104322 2024-04-12 15:18:43Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116
117
118/**
119 * Calculates the effective address of a ModR/M memory operand, extended version
120 * for use in the recompilers.
121 *
122 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
123 *
124 * May longjmp on internal error.
125 *
126 * @return The effective address.
127 * @param pVCpu The cross context virtual CPU structure of the calling thread.
128 * @param bRm The ModRM byte.
129 * @param cbImmAndRspOffset - First byte: The size of any immediate
130 * following the effective address opcode bytes
131 * (only for RIP relative addressing).
132 * - Second byte: RSP displacement (for POP [ESP]).
133 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
134 * SIB byte (bits 39:32).
135 *
136 * @note This must be defined in a source file with matching
137 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
138 * or implemented differently...
139 */
140RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
141{
142 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
143# define SET_SS_DEF() \
144 do \
145 { \
146 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
147 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
148 } while (0)
149
150 if (!IEM_IS_64BIT_CODE(pVCpu))
151 {
152/** @todo Check the effective address size crap! */
153 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
154 {
155 uint16_t u16EffAddr;
156
157 /* Handle the disp16 form with no registers first. */
158 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
159 {
160 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
161 *puInfo = u16EffAddr;
162 }
163 else
164 {
165 /* Get the displacment. */
166 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
167 {
168 case 0: u16EffAddr = 0; break;
169 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
170 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
171 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
172 }
173 *puInfo = u16EffAddr;
174
175 /* Add the base and index registers to the disp. */
176 switch (bRm & X86_MODRM_RM_MASK)
177 {
178 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
179 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
180 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
181 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
182 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
183 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
184 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
185 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
186 }
187 }
188
189 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
190 return u16EffAddr;
191 }
192
193 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
194 uint32_t u32EffAddr;
195 uint64_t uInfo;
196
197 /* Handle the disp32 form with no registers first. */
198 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
199 {
200 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
201 uInfo = u32EffAddr;
202 }
203 else
204 {
205 /* Get the register (or SIB) value. */
206 uInfo = 0;
207 switch ((bRm & X86_MODRM_RM_MASK))
208 {
209 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
210 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
211 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
212 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
213 case 4: /* SIB */
214 {
215 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
216 uInfo = (uint64_t)bSib << 32;
217
218 /* Get the index and scale it. */
219 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
220 {
221 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
222 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
223 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
224 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
225 case 4: u32EffAddr = 0; /*none */ break;
226 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
227 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
228 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
229 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
230 }
231 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
232
233 /* add base */
234 switch (bSib & X86_SIB_BASE_MASK)
235 {
236 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
237 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
238 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
239 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
240 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
241 case 5:
242 if ((bRm & X86_MODRM_MOD_MASK) != 0)
243 {
244 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
245 SET_SS_DEF();
246 }
247 else
248 {
249 uint32_t u32Disp;
250 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
251 u32EffAddr += u32Disp;
252 uInfo |= u32Disp;
253 }
254 break;
255 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
256 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
257 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
258 }
259 break;
260 }
261 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
262 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
263 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
264 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
265 }
266
267 /* Get and add the displacement. */
268 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
269 {
270 case 0:
271 break;
272 case 1:
273 {
274 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
275 u32EffAddr += i8Disp;
276 uInfo |= (uint32_t)(int32_t)i8Disp;
277 break;
278 }
279 case 2:
280 {
281 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
282 u32EffAddr += u32Disp;
283 uInfo |= u32Disp;
284 break;
285 }
286 default:
287 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
288 }
289 }
290
291 *puInfo = uInfo;
292 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
293 return u32EffAddr;
294 }
295
296 uint64_t u64EffAddr;
297 uint64_t uInfo;
298
299 /* Handle the rip+disp32 form with no registers first. */
300 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
301 {
302 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
303 uInfo = (uint32_t)u64EffAddr;
304 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
305 }
306 else
307 {
308 /* Get the register (or SIB) value. */
309 uInfo = 0;
310 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
311 {
312 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
313 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
314 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
315 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
316 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
317 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
318 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
319 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
320 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
321 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
322 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
323 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
324 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
325 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
326 /* SIB */
327 case 4:
328 case 12:
329 {
330 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
331 uInfo = (uint64_t)bSib << 32;
332
333 /* Get the index and scale it. */
334 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
335 {
336 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
337 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
338 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
339 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
340 case 4: u64EffAddr = 0; /*none */ break;
341 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
342 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
343 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
344 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
345 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
346 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
347 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
348 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
349 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
350 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
351 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
352 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
353 }
354 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
355
356 /* add base */
357 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
358 {
359 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
360 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
361 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
362 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
363 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
364 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
365 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
366 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
367 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
368 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
369 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
370 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
371 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
372 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
373 /* complicated encodings */
374 case 5:
375 case 13:
376 if ((bRm & X86_MODRM_MOD_MASK) != 0)
377 {
378 if (!pVCpu->iem.s.uRexB)
379 {
380 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
381 SET_SS_DEF();
382 }
383 else
384 u64EffAddr += pVCpu->cpum.GstCtx.r13;
385 }
386 else
387 {
388 uint32_t u32Disp;
389 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
390 u64EffAddr += (int32_t)u32Disp;
391 uInfo |= u32Disp;
392 }
393 break;
394 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
395 }
396 break;
397 }
398 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
399 }
400
401 /* Get and add the displacement. */
402 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
403 {
404 case 0:
405 break;
406 case 1:
407 {
408 int8_t i8Disp;
409 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
410 u64EffAddr += i8Disp;
411 uInfo |= (uint32_t)(int32_t)i8Disp;
412 break;
413 }
414 case 2:
415 {
416 uint32_t u32Disp;
417 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
418 u64EffAddr += (int32_t)u32Disp;
419 uInfo |= u32Disp;
420 break;
421 }
422 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
423 }
424
425 }
426
427 *puInfo = uInfo;
428 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
429 {
430 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
431 return u64EffAddr;
432 }
433 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
434 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
435 return u64EffAddr & UINT32_MAX;
436}
437
438
439/*********************************************************************************************************************************
440* Translation Block Cache. *
441*********************************************************************************************************************************/
442
443/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
444static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
445{
446 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
447 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
448 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
449 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
450 if (cMsSinceUse1 != cMsSinceUse2)
451 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
452 if (pTb1->cUsed != pTb2->cUsed)
453 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
454 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
455 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
456 return 0;
457}
458
459#ifdef VBOX_STRICT
460/**
461 * Assertion helper that checks a collisions list count.
462 */
463static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
464{
465 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
466 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
467 while (pTb)
468 {
469 pTb = pTb->pNext;
470 cLeft--;
471 }
472 AssertMsg(cLeft == 0,
473 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
474 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
475}
476#endif
477
478
479DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
480{
481 STAM_PROFILE_START(&pTbCache->StatPrune, a);
482
483 /*
484 * First convert the collision list to an array.
485 */
486 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
487 uintptr_t cInserted = 0;
488 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
489
490 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
491
492 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
493 {
494 apSortedTbs[cInserted++] = pTbCollision;
495 pTbCollision = pTbCollision->pNext;
496 }
497
498 /* Free any excess (impossible). */
499 if (RT_LIKELY(!pTbCollision))
500 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
501 else
502 do
503 {
504 PIEMTB pTbToFree = pTbCollision;
505 pTbCollision = pTbToFree->pNext;
506 iemTbAllocatorFree(pVCpu, pTbToFree);
507 } while (pTbCollision);
508
509 /*
510 * Sort it by most recently used and usage count.
511 */
512 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
513
514 /* We keep half the list for now. Perhaps a bit aggressive... */
515 uintptr_t const cKeep = cInserted / 2;
516
517 /* First free up the TBs we don't wish to keep (before creating the new
518 list because otherwise the free code will scan the list for each one
519 without ever finding it). */
520 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
521 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
522
523 /* Then chain the new TB together with the ones we like to keep of the
524 existing ones and insert this list into the hash table. */
525 pTbCollision = pTb;
526 for (uintptr_t idx = 0; idx < cKeep; idx++)
527 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
528 pTbCollision->pNext = NULL;
529
530 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
531#ifdef VBOX_STRICT
532 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
533#endif
534
535 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
536}
537
538
539static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
540{
541 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
542 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
543 if (!pTbOldHead)
544 {
545 pTb->pNext = NULL;
546 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
547 }
548 else
549 {
550 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
551 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
552 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
553 {
554 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
555 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
556#ifdef VBOX_STRICT
557 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
558#endif
559 }
560 else
561 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
562 }
563}
564
565
566/**
567 * Unlinks @a pTb from the hash table if found in it.
568 *
569 * @returns true if unlinked, false if not present.
570 * @param pTbCache The hash table.
571 * @param pTb The TB to remove.
572 */
573static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
574{
575 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
576 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
577 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
578
579 /*
580 * At the head of the collision list?
581 */
582 if (pTbHash == pTb)
583 {
584 if (!pTb->pNext)
585 pTbCache->apHash[idxHash] = NULL;
586 else
587 {
588 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
589 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
590#ifdef VBOX_STRICT
591 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
592#endif
593 }
594 return true;
595 }
596
597 /*
598 * Search the collision list.
599 */
600 PIEMTB const pTbHead = pTbHash;
601 while (pTbHash)
602 {
603 PIEMTB const pNextTb = pTbHash->pNext;
604 if (pNextTb == pTb)
605 {
606 pTbHash->pNext = pTb->pNext;
607 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
608#ifdef VBOX_STRICT
609 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
610#endif
611 return true;
612 }
613 pTbHash = pNextTb;
614 }
615 return false;
616}
617
618
619/**
620 * Looks up a TB for the given PC and flags in the cache.
621 *
622 * @returns Pointer to TB on success, NULL if not found.
623 * @param pVCpu The cross context virtual CPU structure of the
624 * calling thread.
625 * @param pTbCache The translation block cache.
626 * @param GCPhysPc The PC to look up a TB for.
627 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
628 * the lookup.
629 * @thread EMT(pVCpu)
630 */
631static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
632 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
633{
634 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
635 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
636 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
637#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
638 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
639#endif
640 while (pTb)
641 {
642 if (pTb->GCPhysPc == GCPhysPc)
643 {
644 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
645 {
646 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
647 {
648 STAM_COUNTER_INC(&pTbCache->cLookupHits);
649 AssertMsg(cLeft > 0, ("%d\n", cLeft));
650
651 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
652 pTb->cUsed++;
653#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
654 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
655 {
656 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
657 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
658 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
659 return pTb;
660 }
661 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
662 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
663 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
664 return iemNativeRecompile(pVCpu, pTb);
665#else
666 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
667 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
668 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
669 return pTb;
670#endif
671 }
672 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
673 }
674 else
675 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
676 }
677 else
678 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
679
680 pTb = pTb->pNext;
681#ifdef VBOX_STRICT
682 cLeft--;
683#endif
684 }
685 AssertMsg(cLeft == 0, ("%d\n", cLeft));
686 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
687 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
688 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
689 return pTb;
690}
691
692
693/*********************************************************************************************************************************
694* Translation Block Allocator.
695*********************************************************************************************************************************/
696/*
697 * Translation block allocationmanagement.
698 */
699
700#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
701# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
702 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
703# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
704 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
705# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
706 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
707#else
708# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
709 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
710# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
711 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
712# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
713 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
714#endif
715/** Makes a TB index from a chunk index and TB index within that chunk. */
716#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
717 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
718
719
720/**
721 * Initializes the TB allocator and cache for an EMT.
722 *
723 * @returns VBox status code.
724 * @param pVM The VM handle.
725 * @param cInitialTbs The initial number of translation blocks to
726 * preallocator.
727 * @param cMaxTbs The max number of translation blocks allowed.
728 * @param cbInitialExec The initial size of the executable memory allocator.
729 * @param cbMaxExec The max size of the executable memory allocator.
730 * @param cbChunkExec The chunk size for executable memory allocator. Zero
731 * or UINT32_MAX for automatically determining this.
732 * @thread EMT
733 */
734DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
735 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
736{
737 PVMCPUCC pVCpu = VMMGetCpu(pVM);
738 Assert(!pVCpu->iem.s.pTbCacheR3);
739 Assert(!pVCpu->iem.s.pTbAllocatorR3);
740
741 /*
742 * Calculate the chunk size of the TB allocator.
743 * The minimum chunk size is 2MiB.
744 */
745 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
746 uint32_t cbPerChunk = _2M;
747 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
748#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
749 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
750 uint8_t cChunkShift = 21 - cTbShift;
751 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
752#endif
753 for (;;)
754 {
755 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
756 break;
757 cbPerChunk *= 2;
758 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
759#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
760 cChunkShift += 1;
761#endif
762 }
763
764 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
765 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
766 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
767
768 cMaxTbs = cMaxChunks * cTbsPerChunk;
769
770 /*
771 * Allocate and initalize it.
772 */
773 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
774 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
775 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
776 if (!pTbAllocator)
777 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
778 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
779 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
780 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
781 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
782 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
783 pTbAllocator->cbPerChunk = cbPerChunk;
784 pTbAllocator->cMaxTbs = cMaxTbs;
785#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
786 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
787 pTbAllocator->cChunkShift = cChunkShift;
788 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
789#endif
790
791 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
792 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
793
794 /*
795 * Allocate the initial chunks.
796 */
797 for (uint32_t idxChunk = 0; ; idxChunk++)
798 {
799 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
800 if (!paTbs)
801 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
802 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
803 cbPerChunk, idxChunk, pVCpu->idCpu);
804
805 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
806 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
807 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
808 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
809 pTbAllocator->cTotalTbs += cTbsPerChunk;
810
811 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
812 break;
813 }
814
815 /*
816 * Calculate the size of the hash table. We double the max TB count and
817 * round it up to the nearest power of two.
818 */
819 uint32_t cCacheEntries = cMaxTbs * 2;
820 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
821 {
822 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
823 cCacheEntries = RT_BIT_32(iBitTop);
824 Assert(cCacheEntries >= cMaxTbs * 2);
825 }
826
827 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
828 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
829 if (!pTbCache)
830 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
831 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
832 cbTbCache, cCacheEntries, pVCpu->idCpu);
833
834 /*
835 * Initialize it (assumes zeroed by the allocator).
836 */
837 pTbCache->uMagic = IEMTBCACHE_MAGIC;
838 pTbCache->cHash = cCacheEntries;
839 pTbCache->uHashMask = cCacheEntries - 1;
840 Assert(pTbCache->cHash > pTbCache->uHashMask);
841 pVCpu->iem.s.pTbCacheR3 = pTbCache;
842
843 /*
844 * Initialize the native executable memory allocator.
845 */
846#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
847 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
848 AssertLogRelRCReturn(rc, rc);
849#else
850 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
851#endif
852
853 return VINF_SUCCESS;
854}
855
856
857/**
858 * Inner free worker.
859 */
860static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
861 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
862{
863 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
864 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
865 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
866 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
867
868 /*
869 * Unlink the TB from the hash table.
870 */
871 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
872
873 /*
874 * Free the TB itself.
875 */
876 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
877 {
878 case IEMTB_F_TYPE_THREADED:
879 pTbAllocator->cThreadedTbs -= 1;
880 RTMemFree(pTb->Thrd.paCalls);
881 break;
882#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
883 case IEMTB_F_TYPE_NATIVE:
884 pTbAllocator->cNativeTbs -= 1;
885 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
886 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
887 pVCpu, pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
888 break;
889#endif
890 default:
891 AssertFailed();
892 }
893 RTMemFree(pTb->pabOpcodes);
894
895 pTb->pNext = NULL;
896 pTb->fFlags = 0;
897 pTb->GCPhysPc = UINT64_MAX;
898 pTb->Gen.uPtr = 0;
899 pTb->Gen.uData = 0;
900 pTb->cbOpcodes = 0;
901 pTb->pabOpcodes = NULL;
902
903 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
904 Assert(pTbAllocator->cInUseTbs > 0);
905
906 pTbAllocator->cInUseTbs -= 1;
907 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
908}
909
910
911/**
912 * Frees the given TB.
913 *
914 * @param pVCpu The cross context virtual CPU structure of the calling
915 * thread.
916 * @param pTb The translation block to free.
917 * @thread EMT(pVCpu)
918 */
919DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
920{
921 /*
922 * Validate state.
923 */
924 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
925 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
926 uint8_t const idxChunk = pTb->idxAllocChunk;
927 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
928 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
929 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
930
931 /*
932 * Call inner worker.
933 */
934 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
935}
936
937
938/**
939 * Schedules a TB for freeing when it's not longer being executed and/or part of
940 * the caller's call stack.
941 *
942 * The TB will be removed from the translation block cache, though, so it isn't
943 * possible to executed it again and the IEMTB::pNext member can be used to link
944 * it together with other TBs awaiting freeing.
945 *
946 * @param pVCpu The cross context virtual CPU structure of the calling
947 * thread.
948 * @param pTb The translation block to schedule for freeing.
949 */
950static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
951{
952 /*
953 * Validate state.
954 */
955 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
956 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
957 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
958 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
959 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
960 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
961 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
962 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
963 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
964
965 /*
966 * Remove it from the cache and prepend it to the allocator's todo list.
967 */
968 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
969
970 pTb->pNext = pTbAllocator->pDelayedFreeHead;
971 pTbAllocator->pDelayedFreeHead = pTb;
972}
973
974
975/**
976 * Processes the delayed frees.
977 *
978 * This is called by the allocator function as well as the native recompile
979 * function before making any TB or executable memory allocations respectively.
980 */
981void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
982{
983 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
984 pTbAllocator->pDelayedFreeHead = NULL;
985 while (pTb)
986 {
987 PIEMTB const pTbNext = pTb->pNext;
988 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
989 iemTbAllocatorFree(pVCpu, pTb);
990 pTb = pTbNext;
991 }
992}
993
994
995/**
996 * Grow the translation block allocator with another chunk.
997 */
998static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
999{
1000 /*
1001 * Validate state.
1002 */
1003 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1004 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1005 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1006 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1007 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1008
1009 /*
1010 * Allocate a new chunk and add it to the allocator.
1011 */
1012 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1013 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1014 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1015
1016 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1017 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1018 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1019 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1020 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1021 pTbAllocator->cTotalTbs += cTbsPerChunk;
1022 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1023
1024 return VINF_SUCCESS;
1025}
1026
1027
1028/**
1029 * Allocates a TB from allocator with free block.
1030 *
1031 * This is common code to both the fast and slow allocator code paths.
1032 */
1033DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1034{
1035 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1036
1037 int idxTb;
1038 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1039 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1040 pTbAllocator->cTotalTbs,
1041 pTbAllocator->iStartHint & ~(uint32_t)63);
1042 else
1043 idxTb = -1;
1044 if (idxTb < 0)
1045 {
1046 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1047 AssertLogRelReturn(idxTb >= 0, NULL);
1048 }
1049 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1050 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1051
1052 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1053 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1054 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1055 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1056 Assert(pTb->idxAllocChunk == idxChunk);
1057
1058 pTbAllocator->cInUseTbs += 1;
1059 if (fThreaded)
1060 pTbAllocator->cThreadedTbs += 1;
1061 else
1062 pTbAllocator->cNativeTbs += 1;
1063 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1064 return pTb;
1065}
1066
1067
1068/**
1069 * Slow path for iemTbAllocatorAlloc.
1070 */
1071static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1072{
1073 /*
1074 * With some luck we can add another chunk.
1075 */
1076 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1077 {
1078 int rc = iemTbAllocatorGrow(pVCpu);
1079 if (RT_SUCCESS(rc))
1080 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1081 }
1082
1083 /*
1084 * We have to prune stuff. Sigh.
1085 *
1086 * This requires scanning for older TBs and kick them out. Not sure how to
1087 * best do this as we don't want to maintain any list of TBs ordered by last
1088 * usage time. But one reasonably simple approach would be that each time we
1089 * get here we continue a sequential scan of the allocation chunks,
1090 * considering just a smallish number of TBs and freeing a fixed portion of
1091 * them. Say, we consider the next 128 TBs, freeing the least recently used
1092 * in out of groups of 4 TBs, resulting in 32 free TBs.
1093 */
1094 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1095 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1096 uint32_t const cTbsToPrune = 128;
1097 uint32_t const cTbsPerGroup = 4;
1098 uint32_t cFreedTbs = 0;
1099#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1100 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1101#else
1102 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1103#endif
1104 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1105 idxTbPruneFrom = 0;
1106 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1107 {
1108 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1109 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1110 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1111 uint32_t cMsAge = msNow - pTb->msLastUsed;
1112 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1113
1114 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1115 {
1116#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1117 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1118 { /* likely */ }
1119 else
1120 {
1121 idxInChunk2 = 0;
1122 idxChunk2 += 1;
1123 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1124 idxChunk2 = 0;
1125 }
1126#endif
1127 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1128 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1129 if ( cMsAge2 > cMsAge
1130 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1131 {
1132 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1133 pTb = pTb2;
1134 idxChunk = idxChunk2;
1135 idxInChunk = idxInChunk2;
1136 cMsAge = cMsAge2;
1137 }
1138 }
1139
1140 /* Free the TB. */
1141 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1142 cFreedTbs++; /* paranoia */
1143 }
1144 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1145 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1146
1147 /*
1148 * Allocate a TB from the ones we've pruned.
1149 */
1150 if (cFreedTbs)
1151 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1152 return NULL;
1153}
1154
1155
1156/**
1157 * Allocate a translation block.
1158 *
1159 * @returns Pointer to block on success, NULL if we're out and is unable to
1160 * free up an existing one (very unlikely once implemented).
1161 * @param pVCpu The cross context virtual CPU structure of the calling
1162 * thread.
1163 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1164 * For statistics.
1165 */
1166DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1167{
1168 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1169 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1170
1171 /* Free any pending TBs before we proceed. */
1172 if (!pTbAllocator->pDelayedFreeHead)
1173 { /* probably likely */ }
1174 else
1175 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1176
1177 /* If the allocator is full, take slow code path.*/
1178 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1179 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1180 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1181}
1182
1183
1184/**
1185 * This is called when we're out of space for native TBs.
1186 *
1187 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1188 * The difference is that we only prune native TBs and will only free any if
1189 * there are least two in a group. The conditions under which we're called are
1190 * different - there will probably be free TBs in the table when we're called.
1191 * Therefore we increase the group size and max scan length, though we'll stop
1192 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1193 * up at least 8 TBs.
1194 */
1195void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1196{
1197 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1198 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1199
1200 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1201
1202 /*
1203 * Flush the delayed free list before we start freeing TBs indiscriminately.
1204 */
1205 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1206
1207 /*
1208 * Scan and free TBs.
1209 */
1210 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1211 uint32_t const cTbsToPrune = 128 * 8;
1212 uint32_t const cTbsPerGroup = 4 * 4;
1213 uint32_t cFreedTbs = 0;
1214 uint32_t cMaxInstrs = 0;
1215 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1216 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1217 {
1218 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1219 idxTbPruneFrom = 0;
1220 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1221 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1222 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1223 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1224 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1225
1226 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1227 {
1228 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1229 { /* likely */ }
1230 else
1231 {
1232 idxInChunk2 = 0;
1233 idxChunk2 += 1;
1234 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1235 idxChunk2 = 0;
1236 }
1237 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1238 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1239 {
1240 cNativeTbs += 1;
1241 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1242 if ( cMsAge2 > cMsAge
1243 || ( cMsAge2 == cMsAge
1244 && ( pTb2->cUsed < pTb->cUsed
1245 || ( pTb2->cUsed == pTb->cUsed
1246 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1247 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1248 {
1249 pTb = pTb2;
1250 idxChunk = idxChunk2;
1251 idxInChunk = idxInChunk2;
1252 cMsAge = cMsAge2;
1253 }
1254 }
1255 }
1256
1257 /* Free the TB if we found at least two native one in this group. */
1258 if (cNativeTbs >= 2)
1259 {
1260 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1261 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1262 cFreedTbs++;
1263 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1264 break;
1265 }
1266 }
1267 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1268
1269 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1270}
1271
1272
1273/*********************************************************************************************************************************
1274* Threaded Recompiler Core *
1275*********************************************************************************************************************************/
1276/**
1277 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1278 * @returns pszBuf.
1279 * @param fFlags The flags.
1280 * @param pszBuf The output buffer.
1281 * @param cbBuf The output buffer size. At least 32 bytes.
1282 */
1283DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1284{
1285 Assert(cbBuf >= 32);
1286 static RTSTRTUPLE const s_aModes[] =
1287 {
1288 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1289 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1290 /* [02] = */ { RT_STR_TUPLE("!2!") },
1291 /* [03] = */ { RT_STR_TUPLE("!3!") },
1292 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1293 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1294 /* [06] = */ { RT_STR_TUPLE("!6!") },
1295 /* [07] = */ { RT_STR_TUPLE("!7!") },
1296 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1297 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1298 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1299 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1300 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1301 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1302 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1303 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1304 /* [10] = */ { RT_STR_TUPLE("!10!") },
1305 /* [11] = */ { RT_STR_TUPLE("!11!") },
1306 /* [12] = */ { RT_STR_TUPLE("!12!") },
1307 /* [13] = */ { RT_STR_TUPLE("!13!") },
1308 /* [14] = */ { RT_STR_TUPLE("!14!") },
1309 /* [15] = */ { RT_STR_TUPLE("!15!") },
1310 /* [16] = */ { RT_STR_TUPLE("!16!") },
1311 /* [17] = */ { RT_STR_TUPLE("!17!") },
1312 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1313 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1314 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1315 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1316 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1317 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1318 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1319 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1320 };
1321 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1322 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1323 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1324
1325 pszBuf[off++] = ' ';
1326 pszBuf[off++] = 'C';
1327 pszBuf[off++] = 'P';
1328 pszBuf[off++] = 'L';
1329 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1330 Assert(off < 32);
1331
1332 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1333
1334 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1335 {
1336 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1337 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1338 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1339 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1340 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1341 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1342 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1343 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1344 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1345 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1346 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1347 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1348 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1349 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1350 };
1351 if (fFlags)
1352 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1353 if (s_aFlags[i].fFlag & fFlags)
1354 {
1355 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1356 pszBuf[off++] = ' ';
1357 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1358 off += s_aFlags[i].cchName;
1359 fFlags &= ~s_aFlags[i].fFlag;
1360 if (!fFlags)
1361 break;
1362 }
1363 pszBuf[off] = '\0';
1364
1365 return pszBuf;
1366}
1367
1368
1369/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1370static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1371{
1372 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1373 pDis->cbCachedInstr += cbMaxRead;
1374 RT_NOREF(cbMinRead);
1375 return VERR_NO_DATA;
1376}
1377
1378
1379DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1380{
1381 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1382
1383 char szDisBuf[512];
1384
1385 /*
1386 * Print TB info.
1387 */
1388 pHlp->pfnPrintf(pHlp,
1389 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
1390 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1391 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
1392 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1393
1394 /*
1395 * This disassembly is driven by the debug info which follows the native
1396 * code and indicates when it starts with the next guest instructions,
1397 * where labels are and such things.
1398 */
1399 DISSTATE Dis;
1400 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1401 uint32_t const cCalls = pTb->Thrd.cCalls;
1402 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1403 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1404 : DISCPUMODE_64BIT;
1405 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1406 uint8_t idxRange = UINT8_MAX;
1407 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1408 uint32_t offRange = 0;
1409 uint32_t offOpcodes = 0;
1410 uint32_t const cbOpcodes = pTb->cbOpcodes;
1411 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1412
1413 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1414 {
1415 /*
1416 * New opcode range?
1417 */
1418 if ( idxRange == UINT8_MAX
1419 || idxRange >= cRanges
1420 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1421 {
1422 idxRange += 1;
1423 if (idxRange < cRanges)
1424 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1425 else
1426 continue;
1427 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1428 + (pTb->aRanges[idxRange].idxPhysPage == 0
1429 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1430 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1431 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1432 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1433 pTb->aRanges[idxRange].idxPhysPage);
1434 GCPhysPc += offRange;
1435 }
1436
1437 /*
1438 * Disassemble another guest instruction?
1439 */
1440 if ( paCalls[iCall].offOpcode != offOpcodes
1441 && paCalls[iCall].cbOpcode > 0
1442 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1443 {
1444 offOpcodes = paCalls[iCall].offOpcode;
1445 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1446 uint32_t cbInstr = 1;
1447 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1448 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1449 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1450 if (RT_SUCCESS(rc))
1451 {
1452 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1453 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1454 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1455 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1456 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1457 }
1458 else
1459 {
1460 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1461 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1462 cbInstr = paCalls[iCall].cbOpcode;
1463 }
1464 GCPhysPc += cbInstr;
1465 offRange += cbInstr;
1466 }
1467
1468 /*
1469 * Dump call details.
1470 */
1471 pHlp->pfnPrintf(pHlp,
1472 " Call #%u to %s (%u args)\n",
1473 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1474 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1475
1476 /*
1477 * Snoop fExec.
1478 */
1479 switch (paCalls[iCall].enmFunction)
1480 {
1481 default:
1482 break;
1483 case kIemThreadedFunc_BltIn_CheckMode:
1484 fExec = paCalls[iCall].auParams[0];
1485 break;
1486 }
1487 }
1488}
1489
1490
1491
1492/**
1493 * Allocate a translation block for threadeded recompilation.
1494 *
1495 * This is allocated with maxed out call table and storage for opcode bytes,
1496 * because it's only supposed to be called once per EMT to allocate the TB
1497 * pointed to by IEMCPU::pThrdCompileTbR3.
1498 *
1499 * @returns Pointer to the translation block on success, NULL on failure.
1500 * @param pVM The cross context virtual machine structure.
1501 * @param pVCpu The cross context virtual CPU structure of the calling
1502 * thread.
1503 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1504 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1505 */
1506static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1507{
1508 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1509 if (pTb)
1510 {
1511 unsigned const cCalls = 256;
1512 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1513 if (pTb->Thrd.paCalls)
1514 {
1515 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1516 if (pTb->pabOpcodes)
1517 {
1518 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1519 pTb->Thrd.cAllocated = cCalls;
1520 pTb->Thrd.cCalls = 0;
1521 pTb->cbOpcodes = 0;
1522 pTb->pNext = NULL;
1523 pTb->cUsed = 0;
1524 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1525 pTb->idxAllocChunk = UINT8_MAX;
1526 pTb->GCPhysPc = GCPhysPc;
1527 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1528 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1529 pTb->cInstructions = 0;
1530
1531 /* Init the first opcode range. */
1532 pTb->cRanges = 1;
1533 pTb->aRanges[0].cbOpcodes = 0;
1534 pTb->aRanges[0].offOpcodes = 0;
1535 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1536 pTb->aRanges[0].u2Unused = 0;
1537 pTb->aRanges[0].idxPhysPage = 0;
1538 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1539 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1540
1541 return pTb;
1542 }
1543 RTMemFree(pTb->Thrd.paCalls);
1544 }
1545 RTMemFree(pTb);
1546 }
1547 RT_NOREF(pVM);
1548 return NULL;
1549}
1550
1551
1552/**
1553 * Called on the TB that are dedicated for recompilation before it's reused.
1554 *
1555 * @param pVCpu The cross context virtual CPU structure of the calling
1556 * thread.
1557 * @param pTb The translation block to reuse.
1558 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1559 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1560 */
1561static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1562{
1563 pTb->GCPhysPc = GCPhysPc;
1564 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1565 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1566 pTb->Thrd.cCalls = 0;
1567 pTb->cbOpcodes = 0;
1568 pTb->cInstructions = 0;
1569
1570 /* Init the first opcode range. */
1571 pTb->cRanges = 1;
1572 pTb->aRanges[0].cbOpcodes = 0;
1573 pTb->aRanges[0].offOpcodes = 0;
1574 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1575 pTb->aRanges[0].u2Unused = 0;
1576 pTb->aRanges[0].idxPhysPage = 0;
1577 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1578 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1579}
1580
1581
1582/**
1583 * Used to duplicate a threded translation block after recompilation is done.
1584 *
1585 * @returns Pointer to the translation block on success, NULL on failure.
1586 * @param pVM The cross context virtual machine structure.
1587 * @param pVCpu The cross context virtual CPU structure of the calling
1588 * thread.
1589 * @param pTbSrc The TB to duplicate.
1590 */
1591static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1592{
1593 /*
1594 * Just using the heap for now. Will make this more efficient and
1595 * complicated later, don't worry. :-)
1596 */
1597 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1598 if (pTb)
1599 {
1600 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1601 memcpy(pTb, pTbSrc, sizeof(*pTb));
1602 pTb->idxAllocChunk = idxAllocChunk;
1603
1604 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1605 Assert(cCalls > 0);
1606 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1607 if (pTb->Thrd.paCalls)
1608 {
1609 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1610 Assert(cbOpcodes > 0);
1611 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1612 if (pTb->pabOpcodes)
1613 {
1614 pTb->Thrd.cAllocated = cCalls;
1615 pTb->pNext = NULL;
1616 pTb->cUsed = 0;
1617 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1618 pTb->fFlags = pTbSrc->fFlags;
1619
1620 return pTb;
1621 }
1622 RTMemFree(pTb->Thrd.paCalls);
1623 }
1624 iemTbAllocatorFree(pVCpu, pTb);
1625 }
1626 RT_NOREF(pVM);
1627 return NULL;
1628
1629}
1630
1631
1632/**
1633 * Adds the given TB to the hash table.
1634 *
1635 * @param pVCpu The cross context virtual CPU structure of the calling
1636 * thread.
1637 * @param pTbCache The cache to add it to.
1638 * @param pTb The translation block to add.
1639 */
1640static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1641{
1642 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1643
1644 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1645 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1646 if (LogIs12Enabled())
1647 {
1648 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1649 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1650 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1651 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1652 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1653 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1654 pTb->aRanges[idxRange].idxPhysPage == 0
1655 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1656 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1657 }
1658}
1659
1660
1661/**
1662 * Called by opcode verifier functions when they detect a problem.
1663 */
1664void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1665{
1666 /* We cannot free the current TB (indicated by fSafeToFree) because:
1667 - A threaded TB will have its current call entry accessed
1668 to update pVCpu->iem.s.cInstructions.
1669 - A native TB will have code left to execute. */
1670 if (fSafeToFree)
1671 iemTbAllocatorFree(pVCpu, pTb);
1672 else
1673 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1674}
1675
1676
1677/*
1678 * Real code.
1679 */
1680
1681#ifdef LOG_ENABLED
1682/**
1683 * Logs the current instruction.
1684 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1685 * @param pszFunction The IEM function doing the execution.
1686 * @param idxInstr The instruction number in the block.
1687 */
1688static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1689{
1690# ifdef IN_RING3
1691 if (LogIs2Enabled())
1692 {
1693 char szInstr[256];
1694 uint32_t cbInstr = 0;
1695 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1696 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1697 szInstr, sizeof(szInstr), &cbInstr);
1698
1699 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1700 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1701 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1702 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1703 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1704 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1705 " %s\n"
1706 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1707 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1708 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1709 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1710 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1711 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1712 szInstr));
1713
1714 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1715 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1716 }
1717 else
1718# endif
1719 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1720 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1721}
1722#endif /* LOG_ENABLED */
1723
1724
1725#if 0
1726static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1727{
1728 RT_NOREF(pVM, pVCpu);
1729 return rcStrict;
1730}
1731#endif
1732
1733
1734/**
1735 * Initializes the decoder state when compiling TBs.
1736 *
1737 * This presumes that fExec has already be initialized.
1738 *
1739 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1740 * to apply fixes to them as well.
1741 *
1742 * @param pVCpu The cross context virtual CPU structure of the calling
1743 * thread.
1744 * @param fReInit Clear for the first call for a TB, set for subsequent
1745 * calls from inside the compile loop where we can skip a
1746 * couple of things.
1747 * @param fExtraFlags The extra translation block flags when @a fReInit is
1748 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1749 * checked.
1750 */
1751DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1752{
1753 /* ASSUMES: That iemInitExec was already called and that anyone changing
1754 CPU state affecting the fExec bits since then will have updated fExec! */
1755 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1756 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1757
1758 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1759
1760 /* Decoder state: */
1761 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1762 pVCpu->iem.s.enmEffAddrMode = enmMode;
1763 if (enmMode != IEMMODE_64BIT)
1764 {
1765 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1766 pVCpu->iem.s.enmEffOpSize = enmMode;
1767 }
1768 else
1769 {
1770 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1771 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1772 }
1773 pVCpu->iem.s.fPrefixes = 0;
1774 pVCpu->iem.s.uRexReg = 0;
1775 pVCpu->iem.s.uRexB = 0;
1776 pVCpu->iem.s.uRexIndex = 0;
1777 pVCpu->iem.s.idxPrefix = 0;
1778 pVCpu->iem.s.uVex3rdReg = 0;
1779 pVCpu->iem.s.uVexLength = 0;
1780 pVCpu->iem.s.fEvexStuff = 0;
1781 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1782 pVCpu->iem.s.offModRm = 0;
1783 pVCpu->iem.s.iNextMapping = 0;
1784
1785 if (!fReInit)
1786 {
1787 pVCpu->iem.s.cActiveMappings = 0;
1788 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1789 pVCpu->iem.s.fEndTb = false;
1790 pVCpu->iem.s.fTbCheckOpcodes = false;
1791 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1792 pVCpu->iem.s.fTbCrossedPage = false;
1793 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1794 pVCpu->iem.s.fTbCurInstrIsSti = false;
1795 /* Force RF clearing and TF checking on first instruction in the block
1796 as we don't really know what came before and should assume the worst: */
1797 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1798 }
1799 else
1800 {
1801 Assert(pVCpu->iem.s.cActiveMappings == 0);
1802 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1803 Assert(pVCpu->iem.s.fEndTb == false);
1804 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1805 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1806 }
1807 pVCpu->iem.s.fTbCurInstr = 0;
1808
1809#ifdef DBGFTRACE_ENABLED
1810 switch (IEM_GET_CPU_MODE(pVCpu))
1811 {
1812 case IEMMODE_64BIT:
1813 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1814 break;
1815 case IEMMODE_32BIT:
1816 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1817 break;
1818 case IEMMODE_16BIT:
1819 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1820 break;
1821 }
1822#endif
1823}
1824
1825
1826/**
1827 * Initializes the opcode fetcher when starting the compilation.
1828 *
1829 * @param pVCpu The cross context virtual CPU structure of the calling
1830 * thread.
1831 */
1832DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1833{
1834 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1835#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1836 pVCpu->iem.s.offOpcode = 0;
1837#else
1838 RT_NOREF(pVCpu);
1839#endif
1840}
1841
1842
1843/**
1844 * Re-initializes the opcode fetcher between instructions while compiling.
1845 *
1846 * @param pVCpu The cross context virtual CPU structure of the calling
1847 * thread.
1848 */
1849DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1850{
1851 if (pVCpu->iem.s.pbInstrBuf)
1852 {
1853 uint64_t off = pVCpu->cpum.GstCtx.rip;
1854 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1855 off += pVCpu->cpum.GstCtx.cs.u64Base;
1856 off -= pVCpu->iem.s.uInstrBufPc;
1857 if (off < pVCpu->iem.s.cbInstrBufTotal)
1858 {
1859 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1860 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1861 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1862 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1863 else
1864 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1865 }
1866 else
1867 {
1868 pVCpu->iem.s.pbInstrBuf = NULL;
1869 pVCpu->iem.s.offInstrNextByte = 0;
1870 pVCpu->iem.s.offCurInstrStart = 0;
1871 pVCpu->iem.s.cbInstrBuf = 0;
1872 pVCpu->iem.s.cbInstrBufTotal = 0;
1873 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1874 }
1875 }
1876 else
1877 {
1878 pVCpu->iem.s.offInstrNextByte = 0;
1879 pVCpu->iem.s.offCurInstrStart = 0;
1880 pVCpu->iem.s.cbInstrBuf = 0;
1881 pVCpu->iem.s.cbInstrBufTotal = 0;
1882#ifdef VBOX_STRICT
1883 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1884#endif
1885 }
1886#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1887 pVCpu->iem.s.offOpcode = 0;
1888#endif
1889}
1890
1891#ifdef LOG_ENABLED
1892
1893/**
1894 * Inserts a NOP call.
1895 *
1896 * This is for debugging.
1897 *
1898 * @returns true on success, false if we're out of call entries.
1899 * @param pTb The translation block being compiled.
1900 */
1901bool iemThreadedCompileEmitNop(PIEMTB pTb)
1902{
1903 /* Emit the call. */
1904 uint32_t const idxCall = pTb->Thrd.cCalls;
1905 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1906 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1907 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1908 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
1909 pCall->idxInstr = pTb->cInstructions - 1;
1910 pCall->cbOpcode = 0;
1911 pCall->offOpcode = 0;
1912 pCall->uUnused0 = 0;
1913 pCall->auParams[0] = 0;
1914 pCall->auParams[1] = 0;
1915 pCall->auParams[2] = 0;
1916 return true;
1917}
1918
1919
1920/**
1921 * Called by iemThreadedCompile if cpu state logging is desired.
1922 *
1923 * @returns true on success, false if we're out of call entries.
1924 * @param pTb The translation block being compiled.
1925 */
1926bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
1927{
1928 /* Emit the call. */
1929 uint32_t const idxCall = pTb->Thrd.cCalls;
1930 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1931 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1932 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1933 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
1934 pCall->idxInstr = pTb->cInstructions - 1;
1935 pCall->cbOpcode = 0;
1936 pCall->offOpcode = 0;
1937 pCall->uUnused0 = 0;
1938 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
1939 pCall->auParams[1] = 0;
1940 pCall->auParams[2] = 0;
1941 return true;
1942}
1943
1944#endif /* LOG_ENABLED */
1945
1946DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1947{
1948 switch (cbInstr)
1949 {
1950 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1951 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1952 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1953 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1954 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1955 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1956 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1957 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1958 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1959 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1960 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1961 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1962 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1963 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1964 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1965 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1966 }
1967}
1968
1969
1970/**
1971 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1972 *
1973 * - CS LIM check required.
1974 * - Must recheck opcode bytes.
1975 * - Previous instruction branched.
1976 * - TLB load detected, probably due to page crossing.
1977 *
1978 * @returns true if everything went well, false if we're out of space in the TB
1979 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1980 * @param pVCpu The cross context virtual CPU structure of the calling
1981 * thread.
1982 * @param pTb The translation block being compiled.
1983 */
1984bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1985{
1986 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1987 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1988#if 0
1989 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1990 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1991#endif
1992
1993 /*
1994 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1995 * see if it's needed to start checking.
1996 */
1997 bool fConsiderCsLimChecking;
1998 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1999 if ( fMode == IEM_F_MODE_X86_64BIT
2000 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2001 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2002 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2003 fConsiderCsLimChecking = false; /* already enabled or not needed */
2004 else
2005 {
2006 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2007 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2008 fConsiderCsLimChecking = true; /* likely */
2009 else
2010 {
2011 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2012 return false;
2013 }
2014 }
2015
2016 /*
2017 * Prepare call now, even before we know if can accept the instruction in this TB.
2018 * This allows us amending parameters w/o making every case suffer.
2019 */
2020 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2021 uint16_t const offOpcode = pTb->cbOpcodes;
2022 uint8_t idxRange = pTb->cRanges - 1;
2023
2024 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2025 pCall->idxInstr = pTb->cInstructions;
2026 pCall->cbOpcode = cbInstr;
2027 pCall->offOpcode = offOpcode;
2028 pCall->uUnused0 = 0;
2029 pCall->auParams[0] = (uint32_t)cbInstr
2030 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2031 /* The upper dword is sometimes used for cbStartPage. */;
2032 pCall->auParams[1] = idxRange;
2033 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2034
2035/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2036 * gotten onto. If we do, stop */
2037
2038 /*
2039 * Case 1: We've branched (RIP changed).
2040 *
2041 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2042 * Req: 1 extra range, no extra phys.
2043 *
2044 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2045 * necessary (fTbCrossedPage is true).
2046 * Req: 1 extra range, probably 1 extra phys page entry.
2047 *
2048 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2049 * but in addition we cross into the following page and require
2050 * another TLB load.
2051 * Req: 2 extra ranges, probably 2 extra phys page entries.
2052 *
2053 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2054 * the following page (thus fTbCrossedPage is true).
2055 * Req: 2 extra ranges, probably 1 extra phys page entry.
2056 *
2057 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2058 * it may trigger "spuriously" from the CPU point of view because of
2059 * physical page changes that'll invalid the physical TLB and trigger a
2060 * call to the function. In theory this be a big deal, just a bit
2061 * performance loss as we'll pick the LoadingTlb variants.
2062 *
2063 * Note! We do not currently optimize branching to the next instruction (sorry
2064 * 32-bit PIC code). We could maybe do that in the branching code that
2065 * sets (or not) fTbBranched.
2066 */
2067 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2068 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2069 * code. This'll require filtering out far jmps and calls, as they
2070 * load CS which should technically be considered indirect since the
2071 * GDT/LDT entry's base address can be modified independently from
2072 * the code. */
2073 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2074 {
2075 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2076 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2077 {
2078 /* 1a + 1b - instruction fully within the branched to page. */
2079 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2080 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2081
2082 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2083 {
2084 /* Check that we've got a free range. */
2085 idxRange += 1;
2086 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2087 { /* likely */ }
2088 else
2089 {
2090 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2091 return false;
2092 }
2093 pCall->auParams[1] = idxRange;
2094 pCall->auParams[2] = 0;
2095
2096 /* Check that we've got a free page slot. */
2097 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2098 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2099 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2100 pTb->aRanges[idxRange].idxPhysPage = 0;
2101 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2102 || pTb->aGCPhysPages[0] == GCPhysNew)
2103 {
2104 pTb->aGCPhysPages[0] = GCPhysNew;
2105 pTb->aRanges[idxRange].idxPhysPage = 1;
2106 }
2107 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2108 || pTb->aGCPhysPages[1] == GCPhysNew)
2109 {
2110 pTb->aGCPhysPages[1] = GCPhysNew;
2111 pTb->aRanges[idxRange].idxPhysPage = 2;
2112 }
2113 else
2114 {
2115 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2116 return false;
2117 }
2118
2119 /* Finish setting up the new range. */
2120 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2121 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2122 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2123 pTb->aRanges[idxRange].u2Unused = 0;
2124 pTb->cRanges++;
2125 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2126 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2127 pTb->aRanges[idxRange].offOpcodes));
2128 }
2129 else
2130 {
2131 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2132 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2133 }
2134
2135 /* Determin which function we need to load & check.
2136 Note! For jumps to a new page, we'll set both fTbBranched and
2137 fTbCrossedPage to avoid unnecessary TLB work for intra
2138 page branching */
2139 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2140 || pVCpu->iem.s.fTbCrossedPage)
2141 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2142 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2143 : !fConsiderCsLimChecking
2144 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2145 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2146 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2147 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2148 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2149 : !fConsiderCsLimChecking
2150 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2151 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2152 else
2153 {
2154 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2155 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2156 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2157 : !fConsiderCsLimChecking
2158 ? kIemThreadedFunc_BltIn_CheckOpcodes
2159 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2160 }
2161 }
2162 else
2163 {
2164 /* 1c + 1d - instruction crosses pages. */
2165 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2166 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2167
2168 /* Lazy bird: Check that this isn't case 1c, since we've already
2169 load the first physical address. End the TB and
2170 make it a case 2b instead.
2171
2172 Hmm. Too much bother to detect, so just do the same
2173 with case 1d as well. */
2174#if 0 /** @todo get back to this later when we've got the actual branch code in
2175 * place. */
2176 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2177
2178 /* Check that we've got two free ranges. */
2179 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2180 { /* likely */ }
2181 else
2182 return false;
2183 idxRange += 1;
2184 pCall->auParams[1] = idxRange;
2185 pCall->auParams[2] = 0;
2186
2187 /* ... */
2188
2189#else
2190 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2191 return false;
2192#endif
2193 }
2194 }
2195
2196 /*
2197 * Case 2: Page crossing.
2198 *
2199 * Sub-case 2a: The instruction starts on the first byte in the next page.
2200 *
2201 * Sub-case 2b: The instruction has opcode bytes in both the current and
2202 * following page.
2203 *
2204 * Both cases requires a new range table entry and probably a new physical
2205 * page entry. The difference is in which functions to emit and whether to
2206 * add bytes to the current range.
2207 */
2208 else if (pVCpu->iem.s.fTbCrossedPage)
2209 {
2210 /* Check that we've got a free range. */
2211 idxRange += 1;
2212 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2213 { /* likely */ }
2214 else
2215 {
2216 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2217 return false;
2218 }
2219
2220 /* Check that we've got a free page slot. */
2221 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2222 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2223 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2224 pTb->aRanges[idxRange].idxPhysPage = 0;
2225 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2226 || pTb->aGCPhysPages[0] == GCPhysNew)
2227 {
2228 pTb->aGCPhysPages[0] = GCPhysNew;
2229 pTb->aRanges[idxRange].idxPhysPage = 1;
2230 }
2231 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2232 || pTb->aGCPhysPages[1] == GCPhysNew)
2233 {
2234 pTb->aGCPhysPages[1] = GCPhysNew;
2235 pTb->aRanges[idxRange].idxPhysPage = 2;
2236 }
2237 else
2238 {
2239 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2240 return false;
2241 }
2242
2243 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2244 {
2245 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2246 pCall->auParams[1] = idxRange;
2247 pCall->auParams[2] = 0;
2248
2249 /* Finish setting up the new range. */
2250 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2251 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2252 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2253 pTb->aRanges[idxRange].u2Unused = 0;
2254 pTb->cRanges++;
2255 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2256 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2257 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2258
2259 /* Determin which function we need to load & check. */
2260 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2261 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2262 : !fConsiderCsLimChecking
2263 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2264 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2265 }
2266 else
2267 {
2268 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2269 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2270 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2271 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2272
2273 /* We've good. Split the instruction over the old and new range table entries. */
2274 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2275
2276 pTb->aRanges[idxRange].offPhysPage = 0;
2277 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2278 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2279 pTb->aRanges[idxRange].u2Unused = 0;
2280 pTb->cRanges++;
2281 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2282 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2283 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2284
2285 /* Determin which function we need to load & check. */
2286 if (pVCpu->iem.s.fTbCheckOpcodes)
2287 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2288 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2289 : !fConsiderCsLimChecking
2290 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2291 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2292 else
2293 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2294 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2295 : !fConsiderCsLimChecking
2296 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2297 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2298 }
2299 }
2300
2301 /*
2302 * Regular case: No new range required.
2303 */
2304 else
2305 {
2306 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2307 if (pVCpu->iem.s.fTbCheckOpcodes)
2308 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2309 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2310 : kIemThreadedFunc_BltIn_CheckOpcodes;
2311 else
2312 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2313
2314 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2315 pTb->cbOpcodes = offOpcode + cbInstr;
2316 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2317 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2318 }
2319
2320 /*
2321 * Commit the call.
2322 */
2323 pTb->Thrd.cCalls++;
2324
2325 /*
2326 * Clear state.
2327 */
2328 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2329 pVCpu->iem.s.fTbCrossedPage = false;
2330 pVCpu->iem.s.fTbCheckOpcodes = false;
2331
2332 /*
2333 * Copy opcode bytes.
2334 */
2335 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2336 pTb->cbOpcodes = offOpcode + cbInstr;
2337 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2338
2339 return true;
2340}
2341
2342
2343/**
2344 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2345 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2346 *
2347 * @returns true if anything is pending, false if not.
2348 * @param pVCpu The cross context virtual CPU structure of the calling
2349 * thread.
2350 */
2351DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2352{
2353 uint64_t fCpu = pVCpu->fLocalForcedActions;
2354 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2355#if 1
2356 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2357 if (RT_LIKELY( !fCpu
2358 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2359 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2360 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2361 return false;
2362 return true;
2363#else
2364 return false;
2365#endif
2366
2367}
2368
2369
2370/**
2371 * Called by iemThreadedCompile when a block requires a mode check.
2372 *
2373 * @returns true if we should continue, false if we're out of call entries.
2374 * @param pVCpu The cross context virtual CPU structure of the calling
2375 * thread.
2376 * @param pTb The translation block being compiled.
2377 */
2378static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2379{
2380 /* Emit the call. */
2381 uint32_t const idxCall = pTb->Thrd.cCalls;
2382 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2383 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2384 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2385 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2386 pCall->idxInstr = pTb->cInstructions - 1;
2387 pCall->cbOpcode = 0;
2388 pCall->offOpcode = 0;
2389 pCall->uUnused0 = 0;
2390 pCall->auParams[0] = pVCpu->iem.s.fExec;
2391 pCall->auParams[1] = 0;
2392 pCall->auParams[2] = 0;
2393 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2394 return true;
2395}
2396
2397
2398/**
2399 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2400 * set.
2401 *
2402 * @returns true if we should continue, false if an IRQ is deliverable or a
2403 * relevant force flag is pending.
2404 * @param pVCpu The cross context virtual CPU structure of the calling
2405 * thread.
2406 * @param pTb The translation block being compiled.
2407 * @sa iemThreadedCompileCheckIrq
2408 */
2409bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2410{
2411 /*
2412 * Skip this we've already emitted a call after the previous instruction
2413 * or if it's the first call, as we're always checking FFs between blocks.
2414 */
2415 uint32_t const idxCall = pTb->Thrd.cCalls;
2416 if ( idxCall > 0
2417 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2418 {
2419 /* Emit the call. */
2420 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2421 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2422 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2423 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2424 pCall->idxInstr = pTb->cInstructions;
2425 pCall->offOpcode = 0;
2426 pCall->cbOpcode = 0;
2427 pCall->uUnused0 = 0;
2428 pCall->auParams[0] = 0;
2429 pCall->auParams[1] = 0;
2430 pCall->auParams[2] = 0;
2431 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2432
2433 /* Reset the IRQ check value. */
2434 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2435
2436 /*
2437 * Check for deliverable IRQs and pending force flags.
2438 */
2439 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2440 }
2441 return true; /* continue */
2442}
2443
2444
2445/**
2446 * Emits an IRQ check call and checks for pending IRQs.
2447 *
2448 * @returns true if we should continue, false if an IRQ is deliverable or a
2449 * relevant force flag is pending.
2450 * @param pVCpu The cross context virtual CPU structure of the calling
2451 * thread.
2452 * @param pTb The transation block.
2453 * @sa iemThreadedCompileBeginEmitCallsComplications
2454 */
2455static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2456{
2457 /* Check again in a little bit, unless it is immediately following an STI
2458 in which case we *must* check immediately after the next instruction
2459 as well in case it's executed with interrupt inhibition. We could
2460 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2461 bs3-timers-1 which is doing sti + sti + cli. */
2462 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2463 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2464 else
2465 {
2466 pVCpu->iem.s.fTbCurInstrIsSti = false;
2467 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2468 }
2469 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2470
2471 /*
2472 * Emit the call.
2473 */
2474 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2475 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2476 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2477 pCall->idxInstr = pTb->cInstructions;
2478 pCall->offOpcode = 0;
2479 pCall->cbOpcode = 0;
2480 pCall->uUnused0 = 0;
2481 pCall->auParams[0] = 0;
2482 pCall->auParams[1] = 0;
2483 pCall->auParams[2] = 0;
2484
2485 /*
2486 * Check for deliverable IRQs and pending force flags.
2487 */
2488 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2489}
2490
2491
2492/**
2493 * Compiles a new TB and executes it.
2494 *
2495 * We combine compilation and execution here as it makes it simpler code flow
2496 * in the main loop and it allows interpreting while compiling if we want to
2497 * explore that option.
2498 *
2499 * @returns Strict VBox status code.
2500 * @param pVM The cross context virtual machine structure.
2501 * @param pVCpu The cross context virtual CPU structure of the calling
2502 * thread.
2503 * @param GCPhysPc The physical address corresponding to the current
2504 * RIP+CS.BASE.
2505 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2506 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2507 */
2508static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2509{
2510 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2511 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2512
2513 /*
2514 * Get the TB we use for the recompiling. This is a maxed-out TB so
2515 * that'll we'll make a more efficient copy of when we're done compiling.
2516 */
2517 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2518 if (pTb)
2519 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2520 else
2521 {
2522 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2523 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2524 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2525 }
2526
2527 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2528 functions may get at it. */
2529 pVCpu->iem.s.pCurTbR3 = pTb;
2530
2531#if 0
2532 /* Make sure the CheckIrq condition matches the one in EM. */
2533 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2534 const uint32_t cZeroCalls = 1;
2535#else
2536 const uint32_t cZeroCalls = 0;
2537#endif
2538
2539 /*
2540 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2541 */
2542 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2543 iemThreadedCompileInitOpcodeFetching(pVCpu);
2544 VBOXSTRICTRC rcStrict;
2545 for (;;)
2546 {
2547 /* Process the next instruction. */
2548#ifdef LOG_ENABLED
2549 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2550 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2551 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2552 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2553#endif
2554 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2555 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2556
2557 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2558#if 0
2559 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2560 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2561 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2562#endif
2563 if ( rcStrict == VINF_SUCCESS
2564 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2565 && !pVCpu->iem.s.fEndTb)
2566 {
2567 Assert(pTb->Thrd.cCalls > cCallsPrev);
2568 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2569
2570 pVCpu->iem.s.cInstructions++;
2571
2572 /* Check for mode change _after_ certain CIMPL calls, so check that
2573 we continue executing with the same mode value. */
2574 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2575 { /* probable */ }
2576 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2577 { /* extremely likely */ }
2578 else
2579 break;
2580
2581#if defined(LOG_ENABLED) && 0 /* for debugging */
2582 //iemThreadedCompileEmitNop(pTb);
2583 iemThreadedCompileEmitLogCpuState(pTb);
2584#endif
2585 }
2586 else
2587 {
2588 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2589 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2590 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2591 rcStrict = VINF_SUCCESS;
2592
2593 if (pTb->Thrd.cCalls > cZeroCalls)
2594 {
2595 if (cCallsPrev != pTb->Thrd.cCalls)
2596 pVCpu->iem.s.cInstructions++;
2597 break;
2598 }
2599
2600 pVCpu->iem.s.pCurTbR3 = NULL;
2601 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2602 }
2603
2604 /* Check for IRQs? */
2605 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2606 pVCpu->iem.s.cInstrTillIrqCheck--;
2607 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2608 break;
2609
2610 /* Still space in the TB? */
2611 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2612 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated)
2613 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2614 else
2615 {
2616 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2617 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2618 break;
2619 }
2620 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2621 }
2622
2623 /*
2624 * Duplicate the TB into a completed one and link it.
2625 */
2626 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2627 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2628
2629 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2630
2631#ifdef IEM_COMPILE_ONLY_MODE
2632 /*
2633 * Execute the translation block.
2634 */
2635#endif
2636
2637 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2638}
2639
2640
2641
2642/*********************************************************************************************************************************
2643* Recompiled Execution Core *
2644*********************************************************************************************************************************/
2645
2646
2647/**
2648 * Executes a translation block.
2649 *
2650 * @returns Strict VBox status code.
2651 * @param pVCpu The cross context virtual CPU structure of the calling
2652 * thread.
2653 * @param pTb The translation block to execute.
2654 */
2655static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2656{
2657 /*
2658 * Check the opcodes in the first page before starting execution.
2659 */
2660/** @todo this test should take IEMTB_F_CS_LIM_CHECKS into account or something.
2661 * The 'near jmp+call' test in bs3-cpu-basic-2 triggers the 2nd assertion here by
2662 * altering the CS limit such that only one or the two instruction bytes are valid.
2663 * Since it's a CS_LIMT problem, the pbInstrBuf is good for the full length, and
2664 * the test succeeds if skipped, but we assert in debug builds. */
2665 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2666 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2667 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2668 { /* likely */ }
2669 else
2670 {
2671 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2672 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2673 return VINF_SUCCESS;
2674 }
2675
2676 /*
2677 * Set the current TB so CIMPL functions may get at it.
2678 */
2679 pVCpu->iem.s.pCurTbR3 = pTb;
2680
2681 /*
2682 * Execute the block.
2683 */
2684#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2685 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2686 {
2687 pVCpu->iem.s.cTbExecNative++;
2688# ifdef LOG_ENABLED
2689 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2690# endif
2691
2692# ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
2693# ifdef RT_ARCH_AMD64
2694 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2695# else
2696 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2697# endif
2698# else
2699# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2700 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
2701# endif
2702# ifdef RT_ARCH_AMD64
2703 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
2704# else
2705 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
2706# endif
2707# endif
2708
2709# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2710 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2711# endif
2712# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
2713 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
2714# endif
2715 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2716 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2717 { /* likely */ }
2718 else
2719 {
2720 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2721 pVCpu->iem.s.pCurTbR3 = NULL;
2722 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2723
2724 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2725 only to break out of TB execution early. */
2726 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2727 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2728
2729 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2730 and converted to VINF_SUCCESS or whatever is appropriate. */
2731 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2732 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2733
2734 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2735 }
2736 }
2737 else
2738#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2739 {
2740 /*
2741 * The threaded execution loop.
2742 */
2743 pVCpu->iem.s.cTbExecThreaded++;
2744#ifdef LOG_ENABLED
2745 uint64_t uRipPrev = UINT64_MAX;
2746#endif
2747 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2748 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2749 while (cCallsLeft-- > 0)
2750 {
2751#ifdef LOG_ENABLED
2752 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2753 {
2754 uRipPrev = pVCpu->cpum.GstCtx.rip;
2755 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2756 }
2757 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2758 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2759 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2760#endif
2761#ifdef VBOX_WITH_STATISTICS
2762 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2763 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2764#endif
2765 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2766 pCallEntry->auParams[0],
2767 pCallEntry->auParams[1],
2768 pCallEntry->auParams[2]);
2769 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2770 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2771 pCallEntry++;
2772 else
2773 {
2774 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2775 pVCpu->iem.s.pCurTbR3 = NULL;
2776 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2777
2778 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2779 only to break out of TB execution early. */
2780 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2781 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2782 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2783 }
2784 }
2785 }
2786
2787 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2788 pVCpu->iem.s.pCurTbR3 = NULL;
2789 return VINF_SUCCESS;
2790}
2791
2792
2793/**
2794 * This is called when the PC doesn't match the current pbInstrBuf.
2795 *
2796 * Upon return, we're ready for opcode fetching. But please note that
2797 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2798 * MMIO or unassigned).
2799 */
2800static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2801{
2802 pVCpu->iem.s.pbInstrBuf = NULL;
2803 pVCpu->iem.s.offCurInstrStart = 0;
2804 pVCpu->iem.s.offInstrNextByte = 0;
2805 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2806 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2807}
2808
2809
2810/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2811DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2812{
2813 /*
2814 * Set uCurTbStartPc to RIP and calc the effective PC.
2815 */
2816 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2817 pVCpu->iem.s.uCurTbStartPc = uPc;
2818 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2819 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2820
2821 /*
2822 * Advance within the current buffer (PAGE) when possible.
2823 */
2824 if (pVCpu->iem.s.pbInstrBuf)
2825 {
2826 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2827 if (off < pVCpu->iem.s.cbInstrBufTotal)
2828 {
2829 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2830 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2831 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2832 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2833 else
2834 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2835
2836 return pVCpu->iem.s.GCPhysInstrBuf + off;
2837 }
2838 }
2839 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2840}
2841
2842
2843/**
2844 * Determines the extra IEMTB_F_XXX flags.
2845 *
2846 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2847 * IEMTB_F_CS_LIM_CHECKS (or zero).
2848 * @param pVCpu The cross context virtual CPU structure of the calling
2849 * thread.
2850 */
2851DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2852{
2853 uint32_t fRet = 0;
2854
2855 /*
2856 * Determine the inhibit bits.
2857 */
2858 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2859 { /* typical */ }
2860 else
2861 {
2862 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2863 fRet |= IEMTB_F_INHIBIT_SHADOW;
2864 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2865 fRet |= IEMTB_F_INHIBIT_NMI;
2866 }
2867
2868 /*
2869 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2870 * likely to go invalid before the end of the translation block.
2871 */
2872 if (IEM_IS_64BIT_CODE(pVCpu))
2873 return fRet;
2874
2875 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2876 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2877 return fRet;
2878 return fRet | IEMTB_F_CS_LIM_CHECKS;
2879}
2880
2881
2882VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2883{
2884 /*
2885 * See if there is an interrupt pending in TRPM, inject it if we can.
2886 */
2887 if (!TRPMHasTrap(pVCpu))
2888 { /* likely */ }
2889 else
2890 {
2891 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2892 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2893 { /*likely */ }
2894 else
2895 return rcStrict;
2896 }
2897
2898 /*
2899 * Init the execution environment.
2900 */
2901#if 1 /** @todo this seems like a good idea, however if we ever share memory
2902 * directly with other threads on the host, it isn't necessarily... */
2903 if (pVM->cCpus == 1)
2904 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
2905 else
2906#endif
2907 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2908 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2909 { }
2910 else
2911 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2912
2913 /*
2914 * Run-loop.
2915 *
2916 * If we're using setjmp/longjmp we combine all the catching here to avoid
2917 * having to call setjmp for each block we're executing.
2918 */
2919 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2920 for (;;)
2921 {
2922 PIEMTB pTb = NULL;
2923 VBOXSTRICTRC rcStrict;
2924 IEM_TRY_SETJMP(pVCpu, rcStrict)
2925 {
2926 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2927 for (uint32_t iIterations = 0; ; iIterations++)
2928 {
2929 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2930 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2931 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
2932 {
2933 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2934 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2935 if (pTb)
2936 rcStrict = iemTbExec(pVCpu, pTb);
2937 else
2938 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2939 }
2940 else
2941 {
2942 /* This can only happen if the current PC cannot be translated into a
2943 host pointer, which means we're in MMIO or unmapped memory... */
2944#if defined(VBOX_STRICT) && defined(IN_RING3)
2945 rcStrict = DBGFSTOP(pVM);
2946 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
2947 return rcStrict;
2948#endif
2949 rcStrict = IEMExecLots(pVCpu, 2048, cPollRate, NULL);
2950 }
2951 if (rcStrict == VINF_SUCCESS)
2952 {
2953 Assert(pVCpu->iem.s.cActiveMappings == 0);
2954
2955 uint64_t fCpu = pVCpu->fLocalForcedActions;
2956 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2957 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2958 | VMCPU_FF_TLB_FLUSH
2959 | VMCPU_FF_UNHALT );
2960 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2961 if (RT_LIKELY( ( !fCpu
2962 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2963 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2964 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2965 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2966 {
2967 if (RT_LIKELY( (iIterations & cPollRate) != 0
2968 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2969 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2970 else
2971 return VINF_SUCCESS;
2972 }
2973 else
2974 return VINF_SUCCESS;
2975 }
2976 else
2977 return rcStrict;
2978 }
2979 }
2980 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2981 {
2982 pVCpu->iem.s.cLongJumps++;
2983#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2984 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2985#endif
2986 if (pVCpu->iem.s.cActiveMappings > 0)
2987 iemMemRollback(pVCpu);
2988
2989#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2990 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
2991 {
2992 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
2993 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
2994 }
2995#endif
2996
2997#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2998 /* If pTb isn't NULL we're in iemTbExec. */
2999 if (!pTb)
3000 {
3001 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3002 pTb = pVCpu->iem.s.pCurTbR3;
3003 if (pTb)
3004 {
3005 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3006 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3007 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3008 }
3009 }
3010#endif
3011 pVCpu->iem.s.pCurTbR3 = NULL;
3012 return rcStrict;
3013 }
3014 IEM_CATCH_LONGJMP_END(pVCpu);
3015 }
3016}
3017
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette