VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 104030

Last change on this file since 104030 was 103852, checked in by vboxsync, 11 months ago

Move iemTbFlagsToString() to be accessible to both callers [symbol visibility fix], bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 121.5 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 103852 2024-03-14 13:06:27Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116
117/*********************************************************************************************************************************
118* Internal Functions *
119*********************************************************************************************************************************/
120static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
121
122
123/**
124 * Calculates the effective address of a ModR/M memory operand, extended version
125 * for use in the recompilers.
126 *
127 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
128 *
129 * May longjmp on internal error.
130 *
131 * @return The effective address.
132 * @param pVCpu The cross context virtual CPU structure of the calling thread.
133 * @param bRm The ModRM byte.
134 * @param cbImmAndRspOffset - First byte: The size of any immediate
135 * following the effective address opcode bytes
136 * (only for RIP relative addressing).
137 * - Second byte: RSP displacement (for POP [ESP]).
138 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
139 * SIB byte (bits 39:32).
140 *
141 * @note This must be defined in a source file with matching
142 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
143 * or implemented differently...
144 */
145RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
146{
147 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
148# define SET_SS_DEF() \
149 do \
150 { \
151 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
152 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
153 } while (0)
154
155 if (!IEM_IS_64BIT_CODE(pVCpu))
156 {
157/** @todo Check the effective address size crap! */
158 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
159 {
160 uint16_t u16EffAddr;
161
162 /* Handle the disp16 form with no registers first. */
163 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
164 {
165 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
166 *puInfo = u16EffAddr;
167 }
168 else
169 {
170 /* Get the displacment. */
171 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
172 {
173 case 0: u16EffAddr = 0; break;
174 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
175 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
176 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
177 }
178 *puInfo = u16EffAddr;
179
180 /* Add the base and index registers to the disp. */
181 switch (bRm & X86_MODRM_RM_MASK)
182 {
183 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
184 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
185 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
186 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
187 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
188 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
189 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
190 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
191 }
192 }
193
194 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
195 return u16EffAddr;
196 }
197
198 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
199 uint32_t u32EffAddr;
200 uint64_t uInfo;
201
202 /* Handle the disp32 form with no registers first. */
203 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
204 {
205 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
206 uInfo = u32EffAddr;
207 }
208 else
209 {
210 /* Get the register (or SIB) value. */
211 uInfo = 0;
212 switch ((bRm & X86_MODRM_RM_MASK))
213 {
214 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
215 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
216 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
217 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
218 case 4: /* SIB */
219 {
220 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
221 uInfo = (uint64_t)bSib << 32;
222
223 /* Get the index and scale it. */
224 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
225 {
226 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
227 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
228 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
229 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
230 case 4: u32EffAddr = 0; /*none */ break;
231 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
232 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
233 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
234 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
235 }
236 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
237
238 /* add base */
239 switch (bSib & X86_SIB_BASE_MASK)
240 {
241 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
242 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
243 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
244 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
245 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
246 case 5:
247 if ((bRm & X86_MODRM_MOD_MASK) != 0)
248 {
249 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
250 SET_SS_DEF();
251 }
252 else
253 {
254 uint32_t u32Disp;
255 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
256 u32EffAddr += u32Disp;
257 uInfo |= u32Disp;
258 }
259 break;
260 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
261 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
262 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
263 }
264 break;
265 }
266 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
267 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
268 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
269 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
270 }
271
272 /* Get and add the displacement. */
273 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
274 {
275 case 0:
276 break;
277 case 1:
278 {
279 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
280 u32EffAddr += i8Disp;
281 uInfo |= (uint32_t)(int32_t)i8Disp;
282 break;
283 }
284 case 2:
285 {
286 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
287 u32EffAddr += u32Disp;
288 uInfo |= u32Disp;
289 break;
290 }
291 default:
292 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
293 }
294 }
295
296 *puInfo = uInfo;
297 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
298 return u32EffAddr;
299 }
300
301 uint64_t u64EffAddr;
302 uint64_t uInfo;
303
304 /* Handle the rip+disp32 form with no registers first. */
305 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
306 {
307 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
308 uInfo = (uint32_t)u64EffAddr;
309 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
310 }
311 else
312 {
313 /* Get the register (or SIB) value. */
314 uInfo = 0;
315 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
316 {
317 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
318 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
319 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
320 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
321 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
322 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
323 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
324 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
325 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
326 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
327 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
328 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
329 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
330 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
331 /* SIB */
332 case 4:
333 case 12:
334 {
335 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
336 uInfo = (uint64_t)bSib << 32;
337
338 /* Get the index and scale it. */
339 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
340 {
341 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
342 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
343 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
344 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
345 case 4: u64EffAddr = 0; /*none */ break;
346 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
347 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
348 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
349 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
350 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
351 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
352 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
353 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
354 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
355 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
356 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
357 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
358 }
359 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
360
361 /* add base */
362 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
363 {
364 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
365 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
366 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
367 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
368 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
369 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
370 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
371 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
372 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
373 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
374 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
375 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
376 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
377 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
378 /* complicated encodings */
379 case 5:
380 case 13:
381 if ((bRm & X86_MODRM_MOD_MASK) != 0)
382 {
383 if (!pVCpu->iem.s.uRexB)
384 {
385 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
386 SET_SS_DEF();
387 }
388 else
389 u64EffAddr += pVCpu->cpum.GstCtx.r13;
390 }
391 else
392 {
393 uint32_t u32Disp;
394 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
395 u64EffAddr += (int32_t)u32Disp;
396 uInfo |= u32Disp;
397 }
398 break;
399 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
400 }
401 break;
402 }
403 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
404 }
405
406 /* Get and add the displacement. */
407 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
408 {
409 case 0:
410 break;
411 case 1:
412 {
413 int8_t i8Disp;
414 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
415 u64EffAddr += i8Disp;
416 uInfo |= (uint32_t)(int32_t)i8Disp;
417 break;
418 }
419 case 2:
420 {
421 uint32_t u32Disp;
422 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
423 u64EffAddr += (int32_t)u32Disp;
424 uInfo |= u32Disp;
425 break;
426 }
427 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
428 }
429
430 }
431
432 *puInfo = uInfo;
433 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
434 {
435 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
436 return u64EffAddr;
437 }
438 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
439 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
440 return u64EffAddr & UINT32_MAX;
441}
442
443
444/*********************************************************************************************************************************
445* Translation Block Cache. *
446*********************************************************************************************************************************/
447
448/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
449static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
450{
451 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
452 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
453 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
454 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
455 if (cMsSinceUse1 != cMsSinceUse2)
456 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
457 if (pTb1->cUsed != pTb2->cUsed)
458 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
459 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
460 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
461 return 0;
462}
463
464#ifdef VBOX_STRICT
465/**
466 * Assertion helper that checks a collisions list count.
467 */
468static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
469{
470 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
471 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
472 while (pTb)
473 {
474 pTb = pTb->pNext;
475 cLeft--;
476 }
477 AssertMsg(cLeft == 0,
478 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
479 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
480}
481#endif
482
483
484DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
485{
486 STAM_PROFILE_START(&pTbCache->StatPrune, a);
487
488 /*
489 * First convert the collision list to an array.
490 */
491 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
492 uintptr_t cInserted = 0;
493 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
494
495 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
496
497 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
498 {
499 apSortedTbs[cInserted++] = pTbCollision;
500 pTbCollision = pTbCollision->pNext;
501 }
502
503 /* Free any excess (impossible). */
504 if (RT_LIKELY(!pTbCollision))
505 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
506 else
507 do
508 {
509 PIEMTB pTbToFree = pTbCollision;
510 pTbCollision = pTbToFree->pNext;
511 iemTbAllocatorFree(pVCpu, pTbToFree);
512 } while (pTbCollision);
513
514 /*
515 * Sort it by most recently used and usage count.
516 */
517 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
518
519 /* We keep half the list for now. Perhaps a bit aggressive... */
520 uintptr_t const cKeep = cInserted / 2;
521
522 /* First free up the TBs we don't wish to keep (before creating the new
523 list because otherwise the free code will scan the list for each one
524 without ever finding it). */
525 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
526 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
527
528 /* Then chain the new TB together with the ones we like to keep of the
529 existing ones and insert this list into the hash table. */
530 pTbCollision = pTb;
531 for (uintptr_t idx = 0; idx < cKeep; idx++)
532 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
533 pTbCollision->pNext = NULL;
534
535 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
536#ifdef VBOX_STRICT
537 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
538#endif
539
540 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
541}
542
543
544static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
545{
546 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
547 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
548 if (!pTbOldHead)
549 {
550 pTb->pNext = NULL;
551 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
552 }
553 else
554 {
555 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
556 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
557 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
558 {
559 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
560 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
561#ifdef VBOX_STRICT
562 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
563#endif
564 }
565 else
566 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
567 }
568}
569
570
571/**
572 * Unlinks @a pTb from the hash table if found in it.
573 *
574 * @returns true if unlinked, false if not present.
575 * @param pTbCache The hash table.
576 * @param pTb The TB to remove.
577 */
578static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
579{
580 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
581 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
582 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
583
584 /*
585 * At the head of the collision list?
586 */
587 if (pTbHash == pTb)
588 {
589 if (!pTb->pNext)
590 pTbCache->apHash[idxHash] = NULL;
591 else
592 {
593 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
594 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
595#ifdef VBOX_STRICT
596 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
597#endif
598 }
599 return true;
600 }
601
602 /*
603 * Search the collision list.
604 */
605 PIEMTB const pTbHead = pTbHash;
606 while (pTbHash)
607 {
608 PIEMTB const pNextTb = pTbHash->pNext;
609 if (pNextTb == pTb)
610 {
611 pTbHash->pNext = pTb->pNext;
612 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
613#ifdef VBOX_STRICT
614 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
615#endif
616 return true;
617 }
618 pTbHash = pNextTb;
619 }
620 return false;
621}
622
623
624/**
625 * Looks up a TB for the given PC and flags in the cache.
626 *
627 * @returns Pointer to TB on success, NULL if not found.
628 * @param pVCpu The cross context virtual CPU structure of the
629 * calling thread.
630 * @param pTbCache The translation block cache.
631 * @param GCPhysPc The PC to look up a TB for.
632 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
633 * the lookup.
634 * @thread EMT(pVCpu)
635 */
636static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
637 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
638{
639 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
640 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
641 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
642#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
643 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
644#endif
645 while (pTb)
646 {
647 if (pTb->GCPhysPc == GCPhysPc)
648 {
649 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
650 {
651 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
652 {
653 STAM_COUNTER_INC(&pTbCache->cLookupHits);
654 AssertMsg(cLeft > 0, ("%d\n", cLeft));
655
656 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
657 pTb->cUsed++;
658#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
659 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != 16)
660 {
661 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
662 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
663 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
664 return pTb;
665 }
666 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
667 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
668 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
669 return iemNativeRecompile(pVCpu, pTb);
670#else
671 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
672 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
673 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
674 return pTb;
675#endif
676 }
677 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
678 }
679 else
680 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
681 }
682 else
683 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
684
685 pTb = pTb->pNext;
686#ifdef VBOX_STRICT
687 cLeft--;
688#endif
689 }
690 AssertMsg(cLeft == 0, ("%d\n", cLeft));
691 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
692 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
693 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
694 return pTb;
695}
696
697
698/*********************************************************************************************************************************
699* Translation Block Allocator.
700*********************************************************************************************************************************/
701/*
702 * Translation block allocationmanagement.
703 */
704
705#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
706# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
707 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
708# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
709 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
710# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
711 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
712#else
713# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
714 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
715# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
716 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
717# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
718 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
719#endif
720/** Makes a TB index from a chunk index and TB index within that chunk. */
721#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
722 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
723
724
725/**
726 * Initializes the TB allocator and cache for an EMT.
727 *
728 * @returns VBox status code.
729 * @param pVM The VM handle.
730 * @param cInitialTbs The initial number of translation blocks to
731 * preallocator.
732 * @param cMaxTbs The max number of translation blocks allowed.
733 * @param cbInitialExec The initial size of the executable memory allocator.
734 * @param cbMaxExec The max size of the executable memory allocator.
735 * @param cbChunkExec The chunk size for executable memory allocator. Zero
736 * or UINT32_MAX for automatically determining this.
737 * @thread EMT
738 */
739DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
740 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
741{
742 PVMCPUCC pVCpu = VMMGetCpu(pVM);
743 Assert(!pVCpu->iem.s.pTbCacheR3);
744 Assert(!pVCpu->iem.s.pTbAllocatorR3);
745
746 /*
747 * Calculate the chunk size of the TB allocator.
748 * The minimum chunk size is 2MiB.
749 */
750 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
751 uint32_t cbPerChunk = _2M;
752 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
753#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
754 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
755 uint8_t cChunkShift = 21 - cTbShift;
756 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
757#endif
758 for (;;)
759 {
760 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
761 break;
762 cbPerChunk *= 2;
763 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
764#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
765 cChunkShift += 1;
766#endif
767 }
768
769 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
770 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
771 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
772
773 cMaxTbs = cMaxChunks * cTbsPerChunk;
774
775 /*
776 * Allocate and initalize it.
777 */
778 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
779 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
780 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
781 if (!pTbAllocator)
782 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
783 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
784 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
785 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
786 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
787 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
788 pTbAllocator->cbPerChunk = cbPerChunk;
789 pTbAllocator->cMaxTbs = cMaxTbs;
790#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
791 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
792 pTbAllocator->cChunkShift = cChunkShift;
793 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
794#endif
795
796 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
797 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
798
799 /*
800 * Allocate the initial chunks.
801 */
802 for (uint32_t idxChunk = 0; ; idxChunk++)
803 {
804 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
805 if (!paTbs)
806 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
807 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
808 cbPerChunk, idxChunk, pVCpu->idCpu);
809
810 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
811 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
812 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
813 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
814 pTbAllocator->cTotalTbs += cTbsPerChunk;
815
816 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
817 break;
818 }
819
820 /*
821 * Calculate the size of the hash table. We double the max TB count and
822 * round it up to the nearest power of two.
823 */
824 uint32_t cCacheEntries = cMaxTbs * 2;
825 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
826 {
827 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
828 cCacheEntries = RT_BIT_32(iBitTop);
829 Assert(cCacheEntries >= cMaxTbs * 2);
830 }
831
832 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
833 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
834 if (!pTbCache)
835 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
836 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
837 cbTbCache, cCacheEntries, pVCpu->idCpu);
838
839 /*
840 * Initialize it (assumes zeroed by the allocator).
841 */
842 pTbCache->uMagic = IEMTBCACHE_MAGIC;
843 pTbCache->cHash = cCacheEntries;
844 pTbCache->uHashMask = cCacheEntries - 1;
845 Assert(pTbCache->cHash > pTbCache->uHashMask);
846 pVCpu->iem.s.pTbCacheR3 = pTbCache;
847
848 /*
849 * Initialize the native executable memory allocator.
850 */
851#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
852 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
853 AssertLogRelRCReturn(rc, rc);
854#else
855 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
856#endif
857
858 return VINF_SUCCESS;
859}
860
861
862/**
863 * Inner free worker.
864 */
865static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
866 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
867{
868 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
869 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
870 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
871 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
872
873 /*
874 * Unlink the TB from the hash table.
875 */
876 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
877
878 /*
879 * Free the TB itself.
880 */
881 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
882 {
883 case IEMTB_F_TYPE_THREADED:
884 pTbAllocator->cThreadedTbs -= 1;
885 RTMemFree(pTb->Thrd.paCalls);
886 break;
887#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
888 case IEMTB_F_TYPE_NATIVE:
889 pTbAllocator->cNativeTbs -= 1;
890 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
891 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
892 break;
893#endif
894 default:
895 AssertFailed();
896 }
897 RTMemFree(pTb->pabOpcodes);
898
899 pTb->pNext = NULL;
900 pTb->fFlags = 0;
901 pTb->GCPhysPc = UINT64_MAX;
902 pTb->Gen.uPtr = 0;
903 pTb->Gen.uData = 0;
904 pTb->cbOpcodes = 0;
905 pTb->pabOpcodes = NULL;
906
907 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
908 Assert(pTbAllocator->cInUseTbs > 0);
909
910 pTbAllocator->cInUseTbs -= 1;
911 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
912}
913
914
915/**
916 * Frees the given TB.
917 *
918 * @param pVCpu The cross context virtual CPU structure of the calling
919 * thread.
920 * @param pTb The translation block to free.
921 * @thread EMT(pVCpu)
922 */
923static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
924{
925 /*
926 * Validate state.
927 */
928 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
929 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
930 uint8_t const idxChunk = pTb->idxAllocChunk;
931 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
932 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
933 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
934
935 /*
936 * Call inner worker.
937 */
938 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
939}
940
941
942/**
943 * Schedules a native TB for freeing when it's not longer being executed and
944 * part of the caller's call stack.
945 *
946 * The TB will be removed from the translation block cache, though, so it isn't
947 * possible to executed it again and the IEMTB::pNext member can be used to link
948 * it together with other TBs awaiting freeing.
949 *
950 * @param pVCpu The cross context virtual CPU structure of the calling
951 * thread.
952 * @param pTb The translation block to schedule for freeing.
953 */
954static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
955{
956 /*
957 * Validate state.
958 */
959 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
960 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
961 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
962 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
963 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
964 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
965 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
966 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
967
968 /*
969 * Remove it from the cache and prepend it to the allocator's todo list.
970 */
971 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
972
973 pTb->pNext = pTbAllocator->pDelayedFreeHead;
974 pTbAllocator->pDelayedFreeHead = pTb;
975}
976
977
978/**
979 * Processes the delayed frees.
980 *
981 * This is called by the allocator function as well as the native recompile
982 * function before making any TB or executable memory allocations respectively.
983 */
984void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
985{
986 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
987 pTbAllocator->pDelayedFreeHead = NULL;
988 while (pTb)
989 {
990 PIEMTB const pTbNext = pTb->pNext;
991 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
992 iemTbAllocatorFree(pVCpu, pTb);
993 pTb = pTbNext;
994 }
995}
996
997
998/**
999 * Grow the translation block allocator with another chunk.
1000 */
1001static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1002{
1003 /*
1004 * Validate state.
1005 */
1006 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1007 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1008 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1009 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1010 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1011
1012 /*
1013 * Allocate a new chunk and add it to the allocator.
1014 */
1015 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1016 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1017 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1018
1019 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1020 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1021 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1022 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1023 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1024 pTbAllocator->cTotalTbs += cTbsPerChunk;
1025 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1026
1027 return VINF_SUCCESS;
1028}
1029
1030
1031/**
1032 * Allocates a TB from allocator with free block.
1033 *
1034 * This is common code to both the fast and slow allocator code paths.
1035 */
1036DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1037{
1038 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1039
1040 int idxTb;
1041 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1042 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1043 pTbAllocator->cTotalTbs,
1044 pTbAllocator->iStartHint & ~(uint32_t)63);
1045 else
1046 idxTb = -1;
1047 if (idxTb < 0)
1048 {
1049 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1050 AssertLogRelReturn(idxTb >= 0, NULL);
1051 }
1052 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1053 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1054
1055 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1056 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1057 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1058 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1059 Assert(pTb->idxAllocChunk == idxChunk);
1060
1061 pTbAllocator->cInUseTbs += 1;
1062 if (fThreaded)
1063 pTbAllocator->cThreadedTbs += 1;
1064 else
1065 pTbAllocator->cNativeTbs += 1;
1066 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1067 return pTb;
1068}
1069
1070
1071/**
1072 * Slow path for iemTbAllocatorAlloc.
1073 */
1074static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1075{
1076 /*
1077 * With some luck we can add another chunk.
1078 */
1079 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1080 {
1081 int rc = iemTbAllocatorGrow(pVCpu);
1082 if (RT_SUCCESS(rc))
1083 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1084 }
1085
1086 /*
1087 * We have to prune stuff. Sigh.
1088 *
1089 * This requires scanning for older TBs and kick them out. Not sure how to
1090 * best do this as we don't want to maintain any list of TBs ordered by last
1091 * usage time. But one reasonably simple approach would be that each time we
1092 * get here we continue a sequential scan of the allocation chunks,
1093 * considering just a smallish number of TBs and freeing a fixed portion of
1094 * them. Say, we consider the next 128 TBs, freeing the least recently used
1095 * in out of groups of 4 TBs, resulting in 32 free TBs.
1096 */
1097 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1098 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1099 uint32_t const cTbsToPrune = 128;
1100 uint32_t const cTbsPerGroup = 4;
1101 uint32_t cFreedTbs = 0;
1102#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1103 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1104#else
1105 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1106#endif
1107 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1108 idxTbPruneFrom = 0;
1109 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1110 {
1111 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1112 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1113 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1114 uint32_t cMsAge = msNow - pTb->msLastUsed;
1115 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1116
1117 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1118 {
1119#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1120 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1121 { /* likely */ }
1122 else
1123 {
1124 idxInChunk2 = 0;
1125 idxChunk2 += 1;
1126 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1127 idxChunk2 = 0;
1128 }
1129#endif
1130 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1131 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1132 if ( cMsAge2 > cMsAge
1133 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1134 {
1135 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1136 pTb = pTb2;
1137 idxChunk = idxChunk2;
1138 idxInChunk = idxInChunk2;
1139 cMsAge = cMsAge2;
1140 }
1141 }
1142
1143 /* Free the TB. */
1144 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1145 cFreedTbs++; /* paranoia */
1146 }
1147 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1148 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1149
1150 /*
1151 * Allocate a TB from the ones we've pruned.
1152 */
1153 if (cFreedTbs)
1154 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1155 return NULL;
1156}
1157
1158
1159/**
1160 * Allocate a translation block.
1161 *
1162 * @returns Pointer to block on success, NULL if we're out and is unable to
1163 * free up an existing one (very unlikely once implemented).
1164 * @param pVCpu The cross context virtual CPU structure of the calling
1165 * thread.
1166 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1167 * For statistics.
1168 */
1169DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1170{
1171 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1172 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1173
1174 /* Free any pending TBs before we proceed. */
1175 if (!pTbAllocator->pDelayedFreeHead)
1176 { /* probably likely */ }
1177 else
1178 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1179
1180 /* If the allocator is full, take slow code path.*/
1181 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1182 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1183 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1184}
1185
1186
1187/**
1188 * This is called when we're out of space for native TBs.
1189 *
1190 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1191 * The difference is that we only prune native TBs and will only free any if
1192 * there are least two in a group. The conditions under which we're called are
1193 * different - there will probably be free TBs in the table when we're called.
1194 * Therefore we increase the group size and max scan length, though we'll stop
1195 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1196 * up at least 8 TBs.
1197 */
1198void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1199{
1200 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1201 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1202
1203 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1204
1205 /*
1206 * Flush the delayed free list before we start freeing TBs indiscriminately.
1207 */
1208 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1209
1210 /*
1211 * Scan and free TBs.
1212 */
1213 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1214 uint32_t const cTbsToPrune = 128 * 8;
1215 uint32_t const cTbsPerGroup = 4 * 4;
1216 uint32_t cFreedTbs = 0;
1217 uint32_t cMaxInstrs = 0;
1218 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1219 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1220 {
1221 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1222 idxTbPruneFrom = 0;
1223 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1224 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1225 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1226 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1227 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1228
1229 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1230 {
1231 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1232 { /* likely */ }
1233 else
1234 {
1235 idxInChunk2 = 0;
1236 idxChunk2 += 1;
1237 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1238 idxChunk2 = 0;
1239 }
1240 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1241 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1242 {
1243 cNativeTbs += 1;
1244 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1245 if ( cMsAge2 > cMsAge
1246 || ( cMsAge2 == cMsAge
1247 && ( pTb2->cUsed < pTb->cUsed
1248 || ( pTb2->cUsed == pTb->cUsed
1249 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1250 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1251 {
1252 pTb = pTb2;
1253 idxChunk = idxChunk2;
1254 idxInChunk = idxInChunk2;
1255 cMsAge = cMsAge2;
1256 }
1257 }
1258 }
1259
1260 /* Free the TB if we found at least two native one in this group. */
1261 if (cNativeTbs >= 2)
1262 {
1263 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1264 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1265 cFreedTbs++;
1266 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1267 break;
1268 }
1269 }
1270 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1271
1272 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1273}
1274
1275
1276/*********************************************************************************************************************************
1277* Threaded Recompiler Core *
1278*********************************************************************************************************************************/
1279/**
1280 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1281 * @returns pszBuf.
1282 * @param fFlags The flags.
1283 * @param pszBuf The output buffer.
1284 * @param cbBuf The output buffer size. At least 32 bytes.
1285 */
1286DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1287{
1288 Assert(cbBuf >= 32);
1289 static RTSTRTUPLE const s_aModes[] =
1290 {
1291 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1292 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1293 /* [02] = */ { RT_STR_TUPLE("!2!") },
1294 /* [03] = */ { RT_STR_TUPLE("!3!") },
1295 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1296 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1297 /* [06] = */ { RT_STR_TUPLE("!6!") },
1298 /* [07] = */ { RT_STR_TUPLE("!7!") },
1299 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1300 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1301 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1302 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1303 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1304 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1305 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1306 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1307 /* [10] = */ { RT_STR_TUPLE("!10!") },
1308 /* [11] = */ { RT_STR_TUPLE("!11!") },
1309 /* [12] = */ { RT_STR_TUPLE("!12!") },
1310 /* [13] = */ { RT_STR_TUPLE("!13!") },
1311 /* [14] = */ { RT_STR_TUPLE("!14!") },
1312 /* [15] = */ { RT_STR_TUPLE("!15!") },
1313 /* [16] = */ { RT_STR_TUPLE("!16!") },
1314 /* [17] = */ { RT_STR_TUPLE("!17!") },
1315 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1316 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1317 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1318 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1319 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1320 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1321 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1322 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1323 };
1324 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1325 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1326 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1327
1328 pszBuf[off++] = ' ';
1329 pszBuf[off++] = 'C';
1330 pszBuf[off++] = 'P';
1331 pszBuf[off++] = 'L';
1332 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1333 Assert(off < 32);
1334
1335 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1336
1337 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1338 {
1339 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1340 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1341 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1342 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1343 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1344 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1345 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1346 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1347 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1348 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1349 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1350 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1351 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1352 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1353 };
1354 if (fFlags)
1355 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1356 if (s_aFlags[i].fFlag & fFlags)
1357 {
1358 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1359 pszBuf[off++] = ' ';
1360 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1361 off += s_aFlags[i].cchName;
1362 fFlags &= ~s_aFlags[i].fFlag;
1363 if (!fFlags)
1364 break;
1365 }
1366 pszBuf[off] = '\0';
1367
1368 return pszBuf;
1369}
1370
1371
1372/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1373static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1374{
1375 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1376 pDis->cbCachedInstr += cbMaxRead;
1377 RT_NOREF(cbMinRead);
1378 return VERR_NO_DATA;
1379}
1380
1381
1382DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1383{
1384 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1385
1386 char szDisBuf[512];
1387
1388 /*
1389 * Print TB info.
1390 */
1391 pHlp->pfnPrintf(pHlp,
1392 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
1393 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1394 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
1395 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1396
1397 /*
1398 * This disassembly is driven by the debug info which follows the native
1399 * code and indicates when it starts with the next guest instructions,
1400 * where labels are and such things.
1401 */
1402 DISSTATE Dis;
1403 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1404 uint32_t const cCalls = pTb->Thrd.cCalls;
1405 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1406 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1407 : DISCPUMODE_64BIT;
1408 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1409 uint8_t idxRange = UINT8_MAX;
1410 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1411 uint32_t offRange = 0;
1412 uint32_t offOpcodes = 0;
1413 uint32_t const cbOpcodes = pTb->cbOpcodes;
1414 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1415
1416 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1417 {
1418 /*
1419 * New opcode range?
1420 */
1421 if ( idxRange == UINT8_MAX
1422 || idxRange >= cRanges
1423 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1424 {
1425 idxRange += 1;
1426 if (idxRange < cRanges)
1427 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1428 else
1429 continue;
1430 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1431 + (pTb->aRanges[idxRange].idxPhysPage == 0
1432 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1433 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1434 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1435 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1436 pTb->aRanges[idxRange].idxPhysPage);
1437 GCPhysPc += offRange;
1438 }
1439
1440 /*
1441 * Disassemble another guest instruction?
1442 */
1443 if ( paCalls[iCall].offOpcode != offOpcodes
1444 && paCalls[iCall].cbOpcode > 0
1445 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1446 {
1447 offOpcodes = paCalls[iCall].offOpcode;
1448 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1449 uint32_t cbInstr = 1;
1450 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1451 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1452 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1453 if (RT_SUCCESS(rc))
1454 {
1455 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1456 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1457 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1458 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1459 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1460 }
1461 else
1462 {
1463 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1464 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1465 cbInstr = paCalls[iCall].cbOpcode;
1466 }
1467 GCPhysPc += cbInstr;
1468 offRange += cbInstr;
1469 }
1470
1471 /*
1472 * Dump call details.
1473 */
1474 pHlp->pfnPrintf(pHlp,
1475 " Call #%u to %s (%u args)\n",
1476 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1477 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1478
1479 /*
1480 * Snoop fExec.
1481 */
1482 switch (paCalls[iCall].enmFunction)
1483 {
1484 default:
1485 break;
1486 case kIemThreadedFunc_BltIn_CheckMode:
1487 fExec = paCalls[iCall].auParams[0];
1488 break;
1489 }
1490 }
1491}
1492
1493
1494
1495/**
1496 * Allocate a translation block for threadeded recompilation.
1497 *
1498 * This is allocated with maxed out call table and storage for opcode bytes,
1499 * because it's only supposed to be called once per EMT to allocate the TB
1500 * pointed to by IEMCPU::pThrdCompileTbR3.
1501 *
1502 * @returns Pointer to the translation block on success, NULL on failure.
1503 * @param pVM The cross context virtual machine structure.
1504 * @param pVCpu The cross context virtual CPU structure of the calling
1505 * thread.
1506 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1507 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1508 */
1509static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1510{
1511 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1512 if (pTb)
1513 {
1514 unsigned const cCalls = 256;
1515 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1516 if (pTb->Thrd.paCalls)
1517 {
1518 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1519 if (pTb->pabOpcodes)
1520 {
1521 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1522 pTb->Thrd.cAllocated = cCalls;
1523 pTb->Thrd.cCalls = 0;
1524 pTb->cbOpcodes = 0;
1525 pTb->pNext = NULL;
1526 pTb->cUsed = 0;
1527 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1528 pTb->idxAllocChunk = UINT8_MAX;
1529 pTb->GCPhysPc = GCPhysPc;
1530 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1531 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1532 pTb->cInstructions = 0;
1533
1534 /* Init the first opcode range. */
1535 pTb->cRanges = 1;
1536 pTb->aRanges[0].cbOpcodes = 0;
1537 pTb->aRanges[0].offOpcodes = 0;
1538 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1539 pTb->aRanges[0].u2Unused = 0;
1540 pTb->aRanges[0].idxPhysPage = 0;
1541 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1542 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1543
1544 return pTb;
1545 }
1546 RTMemFree(pTb->Thrd.paCalls);
1547 }
1548 RTMemFree(pTb);
1549 }
1550 RT_NOREF(pVM);
1551 return NULL;
1552}
1553
1554
1555/**
1556 * Called on the TB that are dedicated for recompilation before it's reused.
1557 *
1558 * @param pVCpu The cross context virtual CPU structure of the calling
1559 * thread.
1560 * @param pTb The translation block to reuse.
1561 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1562 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1563 */
1564static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1565{
1566 pTb->GCPhysPc = GCPhysPc;
1567 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1568 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1569 pTb->Thrd.cCalls = 0;
1570 pTb->cbOpcodes = 0;
1571 pTb->cInstructions = 0;
1572
1573 /* Init the first opcode range. */
1574 pTb->cRanges = 1;
1575 pTb->aRanges[0].cbOpcodes = 0;
1576 pTb->aRanges[0].offOpcodes = 0;
1577 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1578 pTb->aRanges[0].u2Unused = 0;
1579 pTb->aRanges[0].idxPhysPage = 0;
1580 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1581 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1582}
1583
1584
1585/**
1586 * Used to duplicate a threded translation block after recompilation is done.
1587 *
1588 * @returns Pointer to the translation block on success, NULL on failure.
1589 * @param pVM The cross context virtual machine structure.
1590 * @param pVCpu The cross context virtual CPU structure of the calling
1591 * thread.
1592 * @param pTbSrc The TB to duplicate.
1593 */
1594static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1595{
1596 /*
1597 * Just using the heap for now. Will make this more efficient and
1598 * complicated later, don't worry. :-)
1599 */
1600 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1601 if (pTb)
1602 {
1603 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1604 memcpy(pTb, pTbSrc, sizeof(*pTb));
1605 pTb->idxAllocChunk = idxAllocChunk;
1606
1607 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1608 Assert(cCalls > 0);
1609 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1610 if (pTb->Thrd.paCalls)
1611 {
1612 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1613 Assert(cbOpcodes > 0);
1614 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1615 if (pTb->pabOpcodes)
1616 {
1617 pTb->Thrd.cAllocated = cCalls;
1618 pTb->pNext = NULL;
1619 pTb->cUsed = 0;
1620 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1621 pTb->fFlags = pTbSrc->fFlags;
1622
1623 return pTb;
1624 }
1625 RTMemFree(pTb->Thrd.paCalls);
1626 }
1627 iemTbAllocatorFree(pVCpu, pTb);
1628 }
1629 RT_NOREF(pVM);
1630 return NULL;
1631
1632}
1633
1634
1635/**
1636 * Adds the given TB to the hash table.
1637 *
1638 * @param pVCpu The cross context virtual CPU structure of the calling
1639 * thread.
1640 * @param pTbCache The cache to add it to.
1641 * @param pTb The translation block to add.
1642 */
1643static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1644{
1645 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1646
1647 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1648 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1649 if (LogIs12Enabled())
1650 {
1651 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1652 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1653 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1654 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1655 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1656 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1657 pTb->aRanges[idxRange].idxPhysPage == 0
1658 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1659 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1660 }
1661}
1662
1663
1664/**
1665 * Called by opcode verifier functions when they detect a problem.
1666 */
1667void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1668{
1669 /* Unless it's safe, we can only immediately free threaded TB, as we will
1670 have more code left to execute in native TBs when fSafeToFree == false. */
1671 if (fSafeToFree || (pTb->fFlags & IEMTB_F_TYPE_THREADED))
1672 iemTbAllocatorFree(pVCpu, pTb);
1673 else
1674 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1675}
1676
1677
1678/*
1679 * Real code.
1680 */
1681
1682#ifdef LOG_ENABLED
1683/**
1684 * Logs the current instruction.
1685 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1686 * @param pszFunction The IEM function doing the execution.
1687 * @param idxInstr The instruction number in the block.
1688 */
1689static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1690{
1691# ifdef IN_RING3
1692 if (LogIs2Enabled())
1693 {
1694 char szInstr[256];
1695 uint32_t cbInstr = 0;
1696 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1697 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1698 szInstr, sizeof(szInstr), &cbInstr);
1699
1700 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1701 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1702 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1703 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1704 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1705 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1706 " %s\n"
1707 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1708 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1709 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1710 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1711 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1712 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1713 szInstr));
1714
1715 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1716 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1717 }
1718 else
1719# endif
1720 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1721 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1722}
1723#endif /* LOG_ENABLED */
1724
1725
1726#if 0
1727static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1728{
1729 RT_NOREF(pVM, pVCpu);
1730 return rcStrict;
1731}
1732#endif
1733
1734
1735/**
1736 * Initializes the decoder state when compiling TBs.
1737 *
1738 * This presumes that fExec has already be initialized.
1739 *
1740 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1741 * to apply fixes to them as well.
1742 *
1743 * @param pVCpu The cross context virtual CPU structure of the calling
1744 * thread.
1745 * @param fReInit Clear for the first call for a TB, set for subsequent
1746 * calls from inside the compile loop where we can skip a
1747 * couple of things.
1748 * @param fExtraFlags The extra translation block flags when @a fReInit is
1749 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1750 * checked.
1751 */
1752DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1753{
1754 /* ASSUMES: That iemInitExec was already called and that anyone changing
1755 CPU state affecting the fExec bits since then will have updated fExec! */
1756 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1757 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1758
1759 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1760
1761 /* Decoder state: */
1762 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1763 pVCpu->iem.s.enmEffAddrMode = enmMode;
1764 if (enmMode != IEMMODE_64BIT)
1765 {
1766 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1767 pVCpu->iem.s.enmEffOpSize = enmMode;
1768 }
1769 else
1770 {
1771 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1772 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1773 }
1774 pVCpu->iem.s.fPrefixes = 0;
1775 pVCpu->iem.s.uRexReg = 0;
1776 pVCpu->iem.s.uRexB = 0;
1777 pVCpu->iem.s.uRexIndex = 0;
1778 pVCpu->iem.s.idxPrefix = 0;
1779 pVCpu->iem.s.uVex3rdReg = 0;
1780 pVCpu->iem.s.uVexLength = 0;
1781 pVCpu->iem.s.fEvexStuff = 0;
1782 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1783 pVCpu->iem.s.offModRm = 0;
1784 pVCpu->iem.s.iNextMapping = 0;
1785
1786 if (!fReInit)
1787 {
1788 pVCpu->iem.s.cActiveMappings = 0;
1789 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1790 pVCpu->iem.s.fEndTb = false;
1791 pVCpu->iem.s.fTbCheckOpcodes = false;
1792 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1793 pVCpu->iem.s.fTbCrossedPage = false;
1794 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1795 pVCpu->iem.s.fTbCurInstrIsSti = false;
1796 /* Force RF clearing and TF checking on first instruction in the block
1797 as we don't really know what came before and should assume the worst: */
1798 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1799 }
1800 else
1801 {
1802 Assert(pVCpu->iem.s.cActiveMappings == 0);
1803 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1804 Assert(pVCpu->iem.s.fEndTb == false);
1805 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1806 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1807 }
1808 pVCpu->iem.s.fTbCurInstr = 0;
1809
1810#ifdef DBGFTRACE_ENABLED
1811 switch (IEM_GET_CPU_MODE(pVCpu))
1812 {
1813 case IEMMODE_64BIT:
1814 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1815 break;
1816 case IEMMODE_32BIT:
1817 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1818 break;
1819 case IEMMODE_16BIT:
1820 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1821 break;
1822 }
1823#endif
1824}
1825
1826
1827/**
1828 * Initializes the opcode fetcher when starting the compilation.
1829 *
1830 * @param pVCpu The cross context virtual CPU structure of the calling
1831 * thread.
1832 */
1833DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1834{
1835 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1836#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1837 pVCpu->iem.s.offOpcode = 0;
1838#else
1839 RT_NOREF(pVCpu);
1840#endif
1841}
1842
1843
1844/**
1845 * Re-initializes the opcode fetcher between instructions while compiling.
1846 *
1847 * @param pVCpu The cross context virtual CPU structure of the calling
1848 * thread.
1849 */
1850DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1851{
1852 if (pVCpu->iem.s.pbInstrBuf)
1853 {
1854 uint64_t off = pVCpu->cpum.GstCtx.rip;
1855 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1856 off += pVCpu->cpum.GstCtx.cs.u64Base;
1857 off -= pVCpu->iem.s.uInstrBufPc;
1858 if (off < pVCpu->iem.s.cbInstrBufTotal)
1859 {
1860 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1861 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1862 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1863 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1864 else
1865 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1866 }
1867 else
1868 {
1869 pVCpu->iem.s.pbInstrBuf = NULL;
1870 pVCpu->iem.s.offInstrNextByte = 0;
1871 pVCpu->iem.s.offCurInstrStart = 0;
1872 pVCpu->iem.s.cbInstrBuf = 0;
1873 pVCpu->iem.s.cbInstrBufTotal = 0;
1874 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1875 }
1876 }
1877 else
1878 {
1879 pVCpu->iem.s.offInstrNextByte = 0;
1880 pVCpu->iem.s.offCurInstrStart = 0;
1881 pVCpu->iem.s.cbInstrBuf = 0;
1882 pVCpu->iem.s.cbInstrBufTotal = 0;
1883#ifdef VBOX_STRICT
1884 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1885#endif
1886 }
1887#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1888 pVCpu->iem.s.offOpcode = 0;
1889#endif
1890}
1891
1892#ifdef LOG_ENABLED
1893
1894/**
1895 * Inserts a NOP call.
1896 *
1897 * This is for debugging.
1898 *
1899 * @returns true on success, false if we're out of call entries.
1900 * @param pTb The translation block being compiled.
1901 */
1902bool iemThreadedCompileEmitNop(PIEMTB pTb)
1903{
1904 /* Emit the call. */
1905 uint32_t const idxCall = pTb->Thrd.cCalls;
1906 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1907 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1908 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1909 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
1910 pCall->idxInstr = pTb->cInstructions - 1;
1911 pCall->uUnused0 = 0;
1912 pCall->offOpcode = 0;
1913 pCall->cbOpcode = 0;
1914 pCall->idxRange = 0;
1915 pCall->auParams[0] = 0;
1916 pCall->auParams[1] = 0;
1917 pCall->auParams[2] = 0;
1918 return true;
1919}
1920
1921
1922/**
1923 * Called by iemThreadedCompile if cpu state logging is desired.
1924 *
1925 * @returns true on success, false if we're out of call entries.
1926 * @param pTb The translation block being compiled.
1927 */
1928bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
1929{
1930 /* Emit the call. */
1931 uint32_t const idxCall = pTb->Thrd.cCalls;
1932 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1933 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1934 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1935 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
1936 pCall->idxInstr = pTb->cInstructions - 1;
1937 pCall->uUnused0 = 0;
1938 pCall->offOpcode = 0;
1939 pCall->cbOpcode = 0;
1940 pCall->idxRange = 0;
1941 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
1942 pCall->auParams[1] = 0;
1943 pCall->auParams[2] = 0;
1944 return true;
1945}
1946
1947#endif /* LOG_ENABLED */
1948
1949DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1950{
1951 switch (cbInstr)
1952 {
1953 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1954 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1955 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1956 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1957 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1958 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1959 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1960 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1961 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1962 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1963 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1964 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1965 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1966 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1967 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1968 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1969 }
1970}
1971
1972
1973/**
1974 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1975 *
1976 * - CS LIM check required.
1977 * - Must recheck opcode bytes.
1978 * - Previous instruction branched.
1979 * - TLB load detected, probably due to page crossing.
1980 *
1981 * @returns true if everything went well, false if we're out of space in the TB
1982 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1983 * @param pVCpu The cross context virtual CPU structure of the calling
1984 * thread.
1985 * @param pTb The translation block being compiled.
1986 */
1987bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1988{
1989 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1990 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1991#if 0
1992 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1993 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1994#endif
1995
1996 /*
1997 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1998 * see if it's needed to start checking.
1999 */
2000 bool fConsiderCsLimChecking;
2001 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2002 if ( fMode == IEM_F_MODE_X86_64BIT
2003 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2004 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2005 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2006 fConsiderCsLimChecking = false; /* already enabled or not needed */
2007 else
2008 {
2009 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2010 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2011 fConsiderCsLimChecking = true; /* likely */
2012 else
2013 {
2014 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2015 return false;
2016 }
2017 }
2018
2019 /*
2020 * Prepare call now, even before we know if can accept the instruction in this TB.
2021 * This allows us amending parameters w/o making every case suffer.
2022 */
2023 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2024 uint16_t const offOpcode = pTb->cbOpcodes;
2025 uint8_t idxRange = pTb->cRanges - 1;
2026
2027 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2028 pCall->idxInstr = pTb->cInstructions;
2029 pCall->offOpcode = offOpcode;
2030 pCall->idxRange = idxRange;
2031 pCall->cbOpcode = cbInstr;
2032 pCall->auParams[0] = (uint32_t)cbInstr
2033 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2034 /* The upper dword is sometimes used for cbStartPage. */;
2035 pCall->auParams[1] = idxRange;
2036 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2037
2038/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2039 * gotten onto. If we do, stop */
2040
2041 /*
2042 * Case 1: We've branched (RIP changed).
2043 *
2044 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2045 * Req: 1 extra range, no extra phys.
2046 *
2047 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2048 * necessary (fTbCrossedPage is true).
2049 * Req: 1 extra range, probably 1 extra phys page entry.
2050 *
2051 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2052 * but in addition we cross into the following page and require
2053 * another TLB load.
2054 * Req: 2 extra ranges, probably 2 extra phys page entries.
2055 *
2056 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2057 * the following page (thus fTbCrossedPage is true).
2058 * Req: 2 extra ranges, probably 1 extra phys page entry.
2059 *
2060 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2061 * it may trigger "spuriously" from the CPU point of view because of
2062 * physical page changes that'll invalid the physical TLB and trigger a
2063 * call to the function. In theory this be a big deal, just a bit
2064 * performance loss as we'll pick the LoadingTlb variants.
2065 *
2066 * Note! We do not currently optimize branching to the next instruction (sorry
2067 * 32-bit PIC code). We could maybe do that in the branching code that
2068 * sets (or not) fTbBranched.
2069 */
2070 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2071 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2072 * code. This'll require filtering out far jmps and calls, as they
2073 * load CS which should technically be considered indirect since the
2074 * GDT/LDT entry's base address can be modified independently from
2075 * the code. */
2076 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2077 {
2078 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2079 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2080 {
2081 /* 1a + 1b - instruction fully within the branched to page. */
2082 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2083 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2084
2085 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2086 {
2087 /* Check that we've got a free range. */
2088 idxRange += 1;
2089 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2090 { /* likely */ }
2091 else
2092 {
2093 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2094 return false;
2095 }
2096 pCall->idxRange = idxRange;
2097 pCall->auParams[1] = idxRange;
2098 pCall->auParams[2] = 0;
2099
2100 /* Check that we've got a free page slot. */
2101 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2102 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2103 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2104 pTb->aRanges[idxRange].idxPhysPage = 0;
2105 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2106 || pTb->aGCPhysPages[0] == GCPhysNew)
2107 {
2108 pTb->aGCPhysPages[0] = GCPhysNew;
2109 pTb->aRanges[idxRange].idxPhysPage = 1;
2110 }
2111 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2112 || pTb->aGCPhysPages[1] == GCPhysNew)
2113 {
2114 pTb->aGCPhysPages[1] = GCPhysNew;
2115 pTb->aRanges[idxRange].idxPhysPage = 2;
2116 }
2117 else
2118 {
2119 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2120 return false;
2121 }
2122
2123 /* Finish setting up the new range. */
2124 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2125 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2126 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2127 pTb->aRanges[idxRange].u2Unused = 0;
2128 pTb->cRanges++;
2129 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2130 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2131 pTb->aRanges[idxRange].offOpcodes));
2132 }
2133 else
2134 {
2135 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2136 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2137 }
2138
2139 /* Determin which function we need to load & check.
2140 Note! For jumps to a new page, we'll set both fTbBranched and
2141 fTbCrossedPage to avoid unnecessary TLB work for intra
2142 page branching */
2143 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2144 || pVCpu->iem.s.fTbCrossedPage)
2145 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2146 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2147 : !fConsiderCsLimChecking
2148 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2149 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2150 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2151 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2152 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2153 : !fConsiderCsLimChecking
2154 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2155 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2156 else
2157 {
2158 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2159 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2160 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2161 : !fConsiderCsLimChecking
2162 ? kIemThreadedFunc_BltIn_CheckOpcodes
2163 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2164 }
2165 }
2166 else
2167 {
2168 /* 1c + 1d - instruction crosses pages. */
2169 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2170 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2171
2172 /* Lazy bird: Check that this isn't case 1c, since we've already
2173 load the first physical address. End the TB and
2174 make it a case 2b instead.
2175
2176 Hmm. Too much bother to detect, so just do the same
2177 with case 1d as well. */
2178#if 0 /** @todo get back to this later when we've got the actual branch code in
2179 * place. */
2180 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2181
2182 /* Check that we've got two free ranges. */
2183 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2184 { /* likely */ }
2185 else
2186 return false;
2187 idxRange += 1;
2188 pCall->idxRange = idxRange;
2189 pCall->auParams[1] = idxRange;
2190 pCall->auParams[2] = 0;
2191
2192 /* ... */
2193
2194#else
2195 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2196 return false;
2197#endif
2198 }
2199 }
2200
2201 /*
2202 * Case 2: Page crossing.
2203 *
2204 * Sub-case 2a: The instruction starts on the first byte in the next page.
2205 *
2206 * Sub-case 2b: The instruction has opcode bytes in both the current and
2207 * following page.
2208 *
2209 * Both cases requires a new range table entry and probably a new physical
2210 * page entry. The difference is in which functions to emit and whether to
2211 * add bytes to the current range.
2212 */
2213 else if (pVCpu->iem.s.fTbCrossedPage)
2214 {
2215 /* Check that we've got a free range. */
2216 idxRange += 1;
2217 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2218 { /* likely */ }
2219 else
2220 {
2221 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2222 return false;
2223 }
2224
2225 /* Check that we've got a free page slot. */
2226 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2227 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2228 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2229 pTb->aRanges[idxRange].idxPhysPage = 0;
2230 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2231 || pTb->aGCPhysPages[0] == GCPhysNew)
2232 {
2233 pTb->aGCPhysPages[0] = GCPhysNew;
2234 pTb->aRanges[idxRange].idxPhysPage = 1;
2235 }
2236 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2237 || pTb->aGCPhysPages[1] == GCPhysNew)
2238 {
2239 pTb->aGCPhysPages[1] = GCPhysNew;
2240 pTb->aRanges[idxRange].idxPhysPage = 2;
2241 }
2242 else
2243 {
2244 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2245 return false;
2246 }
2247
2248 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2249 {
2250 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2251 pCall->idxRange = idxRange;
2252 pCall->auParams[1] = idxRange;
2253 pCall->auParams[2] = 0;
2254
2255 /* Finish setting up the new range. */
2256 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2257 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2258 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2259 pTb->aRanges[idxRange].u2Unused = 0;
2260 pTb->cRanges++;
2261 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2262 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2263 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2264
2265 /* Determin which function we need to load & check. */
2266 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2267 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2268 : !fConsiderCsLimChecking
2269 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2270 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2271 }
2272 else
2273 {
2274 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2275 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2276 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2277 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2278
2279 /* We've good. Split the instruction over the old and new range table entries. */
2280 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2281
2282 pTb->aRanges[idxRange].offPhysPage = 0;
2283 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2284 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2285 pTb->aRanges[idxRange].u2Unused = 0;
2286 pTb->cRanges++;
2287 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2288 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2289 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2290
2291 /* Determin which function we need to load & check. */
2292 if (pVCpu->iem.s.fTbCheckOpcodes)
2293 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2294 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2295 : !fConsiderCsLimChecking
2296 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2297 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2298 else
2299 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2300 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2301 : !fConsiderCsLimChecking
2302 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2303 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2304 }
2305 }
2306
2307 /*
2308 * Regular case: No new range required.
2309 */
2310 else
2311 {
2312 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2313 if (pVCpu->iem.s.fTbCheckOpcodes)
2314 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2315 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2316 : kIemThreadedFunc_BltIn_CheckOpcodes;
2317 else
2318 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2319
2320 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2321 pTb->cbOpcodes = offOpcode + cbInstr;
2322 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2323 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2324 }
2325
2326 /*
2327 * Commit the call.
2328 */
2329 pTb->Thrd.cCalls++;
2330
2331 /*
2332 * Clear state.
2333 */
2334 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2335 pVCpu->iem.s.fTbCrossedPage = false;
2336 pVCpu->iem.s.fTbCheckOpcodes = false;
2337
2338 /*
2339 * Copy opcode bytes.
2340 */
2341 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2342 pTb->cbOpcodes = offOpcode + cbInstr;
2343 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2344
2345 return true;
2346}
2347
2348
2349/**
2350 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2351 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2352 *
2353 * @returns true if anything is pending, false if not.
2354 * @param pVCpu The cross context virtual CPU structure of the calling
2355 * thread.
2356 */
2357DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2358{
2359 uint64_t fCpu = pVCpu->fLocalForcedActions;
2360 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2361#if 1
2362 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2363 if (RT_LIKELY( !fCpu
2364 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2365 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2366 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2367 return false;
2368 return true;
2369#else
2370 return false;
2371#endif
2372
2373}
2374
2375
2376/**
2377 * Called by iemThreadedCompile when a block requires a mode check.
2378 *
2379 * @returns true if we should continue, false if we're out of call entries.
2380 * @param pVCpu The cross context virtual CPU structure of the calling
2381 * thread.
2382 * @param pTb The translation block being compiled.
2383 */
2384static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2385{
2386 /* Emit the call. */
2387 uint32_t const idxCall = pTb->Thrd.cCalls;
2388 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2389 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2390 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2391 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2392 pCall->idxInstr = pTb->cInstructions - 1;
2393 pCall->uUnused0 = 0;
2394 pCall->offOpcode = 0;
2395 pCall->cbOpcode = 0;
2396 pCall->idxRange = 0;
2397 pCall->auParams[0] = pVCpu->iem.s.fExec;
2398 pCall->auParams[1] = 0;
2399 pCall->auParams[2] = 0;
2400 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2401 return true;
2402}
2403
2404
2405/**
2406 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2407 * set.
2408 *
2409 * @returns true if we should continue, false if an IRQ is deliverable or a
2410 * relevant force flag is pending.
2411 * @param pVCpu The cross context virtual CPU structure of the calling
2412 * thread.
2413 * @param pTb The translation block being compiled.
2414 * @sa iemThreadedCompileCheckIrq
2415 */
2416bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2417{
2418 /*
2419 * Skip this we've already emitted a call after the previous instruction
2420 * or if it's the first call, as we're always checking FFs between blocks.
2421 */
2422 uint32_t const idxCall = pTb->Thrd.cCalls;
2423 if ( idxCall > 0
2424 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2425 {
2426 /* Emit the call. */
2427 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2428 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2429 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2430 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2431 pCall->idxInstr = pTb->cInstructions;
2432 pCall->uUnused0 = 0;
2433 pCall->offOpcode = 0;
2434 pCall->cbOpcode = 0;
2435 pCall->idxRange = 0;
2436 pCall->auParams[0] = 0;
2437 pCall->auParams[1] = 0;
2438 pCall->auParams[2] = 0;
2439 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2440
2441 /* Reset the IRQ check value. */
2442 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2443
2444 /*
2445 * Check for deliverable IRQs and pending force flags.
2446 */
2447 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2448 }
2449 return true; /* continue */
2450}
2451
2452
2453/**
2454 * Emits an IRQ check call and checks for pending IRQs.
2455 *
2456 * @returns true if we should continue, false if an IRQ is deliverable or a
2457 * relevant force flag is pending.
2458 * @param pVCpu The cross context virtual CPU structure of the calling
2459 * thread.
2460 * @param pTb The transation block.
2461 * @sa iemThreadedCompileBeginEmitCallsComplications
2462 */
2463static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2464{
2465 /* Check again in a little bit, unless it is immediately following an STI
2466 in which case we *must* check immediately after the next instruction
2467 as well in case it's executed with interrupt inhibition. We could
2468 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2469 bs3-timers-1 which is doing sti + sti + cli. */
2470 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2471 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2472 else
2473 {
2474 pVCpu->iem.s.fTbCurInstrIsSti = false;
2475 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2476 }
2477 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2478
2479 /*
2480 * Emit the call.
2481 */
2482 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2483 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2484 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2485 pCall->idxInstr = pTb->cInstructions;
2486 pCall->uUnused0 = 0;
2487 pCall->offOpcode = 0;
2488 pCall->cbOpcode = 0;
2489 pCall->idxRange = 0;
2490 pCall->auParams[0] = 0;
2491 pCall->auParams[1] = 0;
2492 pCall->auParams[2] = 0;
2493
2494 /*
2495 * Check for deliverable IRQs and pending force flags.
2496 */
2497 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2498}
2499
2500
2501/**
2502 * Compiles a new TB and executes it.
2503 *
2504 * We combine compilation and execution here as it makes it simpler code flow
2505 * in the main loop and it allows interpreting while compiling if we want to
2506 * explore that option.
2507 *
2508 * @returns Strict VBox status code.
2509 * @param pVM The cross context virtual machine structure.
2510 * @param pVCpu The cross context virtual CPU structure of the calling
2511 * thread.
2512 * @param GCPhysPc The physical address corresponding to the current
2513 * RIP+CS.BASE.
2514 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2515 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2516 */
2517static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2518{
2519 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2520 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2521
2522 /*
2523 * Get the TB we use for the recompiling. This is a maxed-out TB so
2524 * that'll we'll make a more efficient copy of when we're done compiling.
2525 */
2526 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2527 if (pTb)
2528 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2529 else
2530 {
2531 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2532 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2533 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2534 }
2535
2536 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2537 functions may get at it. */
2538 pVCpu->iem.s.pCurTbR3 = pTb;
2539
2540#if 0
2541 /* Make sure the CheckIrq condition matches the one in EM. */
2542 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2543 const uint32_t cZeroCalls = 1;
2544#else
2545 const uint32_t cZeroCalls = 0;
2546#endif
2547
2548 /*
2549 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2550 */
2551 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2552 iemThreadedCompileInitOpcodeFetching(pVCpu);
2553 VBOXSTRICTRC rcStrict;
2554 for (;;)
2555 {
2556 /* Process the next instruction. */
2557#ifdef LOG_ENABLED
2558 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2559 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2560 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2561 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2562#endif
2563 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2564 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2565
2566 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2567#if 0
2568 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2569 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2570 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2571#endif
2572 if ( rcStrict == VINF_SUCCESS
2573 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2574 && !pVCpu->iem.s.fEndTb)
2575 {
2576 Assert(pTb->Thrd.cCalls > cCallsPrev);
2577 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2578
2579 pVCpu->iem.s.cInstructions++;
2580
2581 /* Check for mode change _after_ certain CIMPL calls, so check that
2582 we continue executing with the same mode value. */
2583 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2584 { /* probable */ }
2585 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2586 { /* extremely likely */ }
2587 else
2588 break;
2589
2590#if defined(LOG_ENABLED) && 0 /* for debugging */
2591 //iemThreadedCompileEmitNop(pTb);
2592 iemThreadedCompileEmitLogCpuState(pTb);
2593#endif
2594 }
2595 else
2596 {
2597 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2598 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2599 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2600 rcStrict = VINF_SUCCESS;
2601
2602 if (pTb->Thrd.cCalls > cZeroCalls)
2603 {
2604 if (cCallsPrev != pTb->Thrd.cCalls)
2605 pVCpu->iem.s.cInstructions++;
2606 break;
2607 }
2608
2609 pVCpu->iem.s.pCurTbR3 = NULL;
2610 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2611 }
2612
2613 /* Check for IRQs? */
2614 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2615 pVCpu->iem.s.cInstrTillIrqCheck--;
2616 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2617 break;
2618
2619 /* Still space in the TB? */
2620 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2621 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated)
2622 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2623 else
2624 {
2625 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2626 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2627 break;
2628 }
2629 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2630 }
2631
2632 /*
2633 * Duplicate the TB into a completed one and link it.
2634 */
2635 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2636 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2637
2638 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2639
2640#ifdef IEM_COMPILE_ONLY_MODE
2641 /*
2642 * Execute the translation block.
2643 */
2644#endif
2645
2646 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2647}
2648
2649
2650
2651/*********************************************************************************************************************************
2652* Recompiled Execution Core *
2653*********************************************************************************************************************************/
2654
2655
2656/**
2657 * Executes a translation block.
2658 *
2659 * @returns Strict VBox status code.
2660 * @param pVCpu The cross context virtual CPU structure of the calling
2661 * thread.
2662 * @param pTb The translation block to execute.
2663 */
2664static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2665{
2666 /*
2667 * Check the opcodes in the first page before starting execution.
2668 */
2669/** @todo this test should take IEMTB_F_CS_LIM_CHECKS into account or something.
2670 * The 'near jmp+call' test in bs3-cpu-basic-2 triggers the 2nd assertion here by
2671 * altering the CS limit such that only one or the two instruction bytes are valid.
2672 * Since it's a CS_LIMT problem, the pbInstrBuf is good for the full length, and
2673 * the test succeeds if skipped, but we assert in debug builds. */
2674 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2675 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2676 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2677 { /* likely */ }
2678 else
2679 {
2680 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2681 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2682 return VINF_SUCCESS;
2683 }
2684
2685 /*
2686 * Set the current TB so CIMPL functions may get at it.
2687 */
2688 pVCpu->iem.s.pCurTbR3 = pTb;
2689
2690 /*
2691 * Execute the block.
2692 */
2693#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2694 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2695 {
2696 pVCpu->iem.s.cTbExecNative++;
2697# ifdef LOG_ENABLED
2698 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2699# endif
2700# ifdef RT_ARCH_AMD64
2701 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2702# else
2703 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2704# endif
2705# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2706 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2707# endif
2708# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
2709 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
2710# endif
2711 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2712 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2713 { /* likely */ }
2714 else
2715 {
2716 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2717 pVCpu->iem.s.pCurTbR3 = NULL;
2718 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2719
2720 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2721 only to break out of TB execution early. */
2722 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2723 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2724
2725 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2726 and converted to VINF_SUCCESS or whatever is appropriate. */
2727 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2728 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2729
2730 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2731 }
2732 }
2733 else
2734#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2735 {
2736 /*
2737 * The threaded execution loop.
2738 */
2739 pVCpu->iem.s.cTbExecThreaded++;
2740#ifdef LOG_ENABLED
2741 uint64_t uRipPrev = UINT64_MAX;
2742#endif
2743 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2744 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2745 while (cCallsLeft-- > 0)
2746 {
2747#ifdef LOG_ENABLED
2748 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2749 {
2750 uRipPrev = pVCpu->cpum.GstCtx.rip;
2751 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2752 }
2753 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2754 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2755 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2756#endif
2757#ifdef VBOX_WITH_STATISTICS
2758 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2759 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2760#endif
2761 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2762 pCallEntry->auParams[0],
2763 pCallEntry->auParams[1],
2764 pCallEntry->auParams[2]);
2765 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2766 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2767 pCallEntry++;
2768 else
2769 {
2770 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2771 pVCpu->iem.s.pCurTbR3 = NULL;
2772 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2773
2774 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2775 only to break out of TB execution early. */
2776 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2777 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2778 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2779 }
2780 }
2781 }
2782
2783 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2784 pVCpu->iem.s.pCurTbR3 = NULL;
2785 return VINF_SUCCESS;
2786}
2787
2788
2789/**
2790 * This is called when the PC doesn't match the current pbInstrBuf.
2791 *
2792 * Upon return, we're ready for opcode fetching. But please note that
2793 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2794 * MMIO or unassigned).
2795 */
2796static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2797{
2798 pVCpu->iem.s.pbInstrBuf = NULL;
2799 pVCpu->iem.s.offCurInstrStart = 0;
2800 pVCpu->iem.s.offInstrNextByte = 0;
2801 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2802 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2803}
2804
2805
2806/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2807DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2808{
2809 /*
2810 * Set uCurTbStartPc to RIP and calc the effective PC.
2811 */
2812 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2813 pVCpu->iem.s.uCurTbStartPc = uPc;
2814 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2815 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2816
2817 /*
2818 * Advance within the current buffer (PAGE) when possible.
2819 */
2820 if (pVCpu->iem.s.pbInstrBuf)
2821 {
2822 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2823 if (off < pVCpu->iem.s.cbInstrBufTotal)
2824 {
2825 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2826 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2827 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2828 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2829 else
2830 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2831
2832 return pVCpu->iem.s.GCPhysInstrBuf + off;
2833 }
2834 }
2835 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2836}
2837
2838
2839/**
2840 * Determines the extra IEMTB_F_XXX flags.
2841 *
2842 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2843 * IEMTB_F_CS_LIM_CHECKS (or zero).
2844 * @param pVCpu The cross context virtual CPU structure of the calling
2845 * thread.
2846 */
2847DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2848{
2849 uint32_t fRet = 0;
2850
2851 /*
2852 * Determine the inhibit bits.
2853 */
2854 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2855 { /* typical */ }
2856 else
2857 {
2858 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2859 fRet |= IEMTB_F_INHIBIT_SHADOW;
2860 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2861 fRet |= IEMTB_F_INHIBIT_NMI;
2862 }
2863
2864 /*
2865 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2866 * likely to go invalid before the end of the translation block.
2867 */
2868 if (IEM_IS_64BIT_CODE(pVCpu))
2869 return fRet;
2870
2871 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2872 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2873 return fRet;
2874 return fRet | IEMTB_F_CS_LIM_CHECKS;
2875}
2876
2877
2878VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2879{
2880 /*
2881 * See if there is an interrupt pending in TRPM, inject it if we can.
2882 */
2883 if (!TRPMHasTrap(pVCpu))
2884 { /* likely */ }
2885 else
2886 {
2887 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2888 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2889 { /*likely */ }
2890 else
2891 return rcStrict;
2892 }
2893
2894 /*
2895 * Init the execution environment.
2896 */
2897#if 1 /** @todo this seems like a good idea, however if we ever share memory
2898 * directly with other threads on the host, it isn't necessarily... */
2899 if (pVM->cCpus == 1)
2900 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
2901 else
2902#endif
2903 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2904 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2905 { }
2906 else
2907 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2908
2909 /*
2910 * Run-loop.
2911 *
2912 * If we're using setjmp/longjmp we combine all the catching here to avoid
2913 * having to call setjmp for each block we're executing.
2914 */
2915 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2916 for (;;)
2917 {
2918 PIEMTB pTb = NULL;
2919 VBOXSTRICTRC rcStrict;
2920 IEM_TRY_SETJMP(pVCpu, rcStrict)
2921 {
2922 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2923 for (uint32_t iIterations = 0; ; iIterations++)
2924 {
2925 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2926 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2927 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
2928 {
2929 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2930 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2931 if (pTb)
2932 rcStrict = iemTbExec(pVCpu, pTb);
2933 else
2934 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2935 }
2936 else
2937 {
2938 /* This can only happen if the current PC cannot be translated into a
2939 host pointer, which means we're in MMIO or unmapped memory... */
2940#if defined(VBOX_STRICT) && defined(IN_RING3)
2941 rcStrict = DBGFSTOP(pVM);
2942 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
2943 return rcStrict;
2944#endif
2945 rcStrict = IEMExecLots(pVCpu, 2048, cPollRate, NULL);
2946 }
2947 if (rcStrict == VINF_SUCCESS)
2948 {
2949 Assert(pVCpu->iem.s.cActiveMappings == 0);
2950
2951 uint64_t fCpu = pVCpu->fLocalForcedActions;
2952 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2953 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2954 | VMCPU_FF_TLB_FLUSH
2955 | VMCPU_FF_UNHALT );
2956 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2957 if (RT_LIKELY( ( !fCpu
2958 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2959 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2960 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2961 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2962 {
2963 if (RT_LIKELY( (iIterations & cPollRate) != 0
2964 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2965 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2966 else
2967 return VINF_SUCCESS;
2968 }
2969 else
2970 return VINF_SUCCESS;
2971 }
2972 else
2973 return rcStrict;
2974 }
2975 }
2976 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2977 {
2978 pVCpu->iem.s.cLongJumps++;
2979#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2980 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2981#endif
2982 if (pVCpu->iem.s.cActiveMappings > 0)
2983 iemMemRollback(pVCpu);
2984
2985#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2986 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
2987 {
2988 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
2989 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
2990 }
2991#endif
2992
2993#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2994 /* If pTb isn't NULL we're in iemTbExec. */
2995 if (!pTb)
2996 {
2997 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2998 pTb = pVCpu->iem.s.pCurTbR3;
2999 if (pTb)
3000 {
3001 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3002 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3003 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3004 }
3005 }
3006#endif
3007 return rcStrict;
3008 }
3009 IEM_CATCH_LONGJMP_END(pVCpu);
3010 }
3011}
3012
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette