VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 103756

Last change on this file since 103756 was 103612, checked in by vboxsync, 10 months ago

VMM/IEM: Refinement of r161881 (pbInstrBuf == NULL situation). bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 117.2 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 103612 2024-02-29 12:59:25Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95
96
97/*
98 * Narrow down configs here to avoid wasting time on unused configs here.
99 */
100
101#ifndef IEM_WITH_CODE_TLB
102# error The code TLB must be enabled for the recompiler.
103#endif
104
105#ifndef IEM_WITH_DATA_TLB
106# error The data TLB must be enabled for the recompiler.
107#endif
108
109#ifndef IEM_WITH_SETJMP
110# error The setjmp approach must be enabled for the recompiler.
111#endif
112
113
114/*********************************************************************************************************************************
115* Internal Functions *
116*********************************************************************************************************************************/
117static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
118
119
120/**
121 * Calculates the effective address of a ModR/M memory operand, extended version
122 * for use in the recompilers.
123 *
124 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
125 *
126 * May longjmp on internal error.
127 *
128 * @return The effective address.
129 * @param pVCpu The cross context virtual CPU structure of the calling thread.
130 * @param bRm The ModRM byte.
131 * @param cbImmAndRspOffset - First byte: The size of any immediate
132 * following the effective address opcode bytes
133 * (only for RIP relative addressing).
134 * - Second byte: RSP displacement (for POP [ESP]).
135 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
136 * SIB byte (bits 39:32).
137 *
138 * @note This must be defined in a source file with matching
139 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
140 * or implemented differently...
141 */
142RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
143{
144 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
145# define SET_SS_DEF() \
146 do \
147 { \
148 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
149 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
150 } while (0)
151
152 if (!IEM_IS_64BIT_CODE(pVCpu))
153 {
154/** @todo Check the effective address size crap! */
155 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
156 {
157 uint16_t u16EffAddr;
158
159 /* Handle the disp16 form with no registers first. */
160 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
161 {
162 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
163 *puInfo = u16EffAddr;
164 }
165 else
166 {
167 /* Get the displacment. */
168 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
169 {
170 case 0: u16EffAddr = 0; break;
171 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
172 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
173 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
174 }
175 *puInfo = u16EffAddr;
176
177 /* Add the base and index registers to the disp. */
178 switch (bRm & X86_MODRM_RM_MASK)
179 {
180 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
181 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
182 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
183 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
184 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
185 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
186 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
187 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
188 }
189 }
190
191 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
192 return u16EffAddr;
193 }
194
195 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
196 uint32_t u32EffAddr;
197 uint64_t uInfo;
198
199 /* Handle the disp32 form with no registers first. */
200 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
201 {
202 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
203 uInfo = u32EffAddr;
204 }
205 else
206 {
207 /* Get the register (or SIB) value. */
208 uInfo = 0;
209 switch ((bRm & X86_MODRM_RM_MASK))
210 {
211 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
212 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
213 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
214 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
215 case 4: /* SIB */
216 {
217 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
218 uInfo = (uint64_t)bSib << 32;
219
220 /* Get the index and scale it. */
221 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
222 {
223 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
224 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
225 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
226 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
227 case 4: u32EffAddr = 0; /*none */ break;
228 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
229 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
230 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
231 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
232 }
233 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
234
235 /* add base */
236 switch (bSib & X86_SIB_BASE_MASK)
237 {
238 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
239 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
240 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
241 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
242 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
243 case 5:
244 if ((bRm & X86_MODRM_MOD_MASK) != 0)
245 {
246 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
247 SET_SS_DEF();
248 }
249 else
250 {
251 uint32_t u32Disp;
252 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
253 u32EffAddr += u32Disp;
254 uInfo |= u32Disp;
255 }
256 break;
257 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
258 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
259 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
260 }
261 break;
262 }
263 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
264 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
265 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
266 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
267 }
268
269 /* Get and add the displacement. */
270 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
271 {
272 case 0:
273 break;
274 case 1:
275 {
276 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
277 u32EffAddr += i8Disp;
278 uInfo |= (uint32_t)(int32_t)i8Disp;
279 break;
280 }
281 case 2:
282 {
283 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
284 u32EffAddr += u32Disp;
285 uInfo |= u32Disp;
286 break;
287 }
288 default:
289 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
290 }
291 }
292
293 *puInfo = uInfo;
294 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
295 return u32EffAddr;
296 }
297
298 uint64_t u64EffAddr;
299 uint64_t uInfo;
300
301 /* Handle the rip+disp32 form with no registers first. */
302 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
303 {
304 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
305 uInfo = (uint32_t)u64EffAddr;
306 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
307 }
308 else
309 {
310 /* Get the register (or SIB) value. */
311 uInfo = 0;
312 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
313 {
314 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
315 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
316 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
317 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
318 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
319 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
320 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
321 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
322 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
323 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
324 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
325 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
326 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
327 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
328 /* SIB */
329 case 4:
330 case 12:
331 {
332 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
333 uInfo = (uint64_t)bSib << 32;
334
335 /* Get the index and scale it. */
336 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
337 {
338 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
339 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
340 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
341 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
342 case 4: u64EffAddr = 0; /*none */ break;
343 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
344 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
345 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
346 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
347 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
348 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
349 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
350 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
351 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
352 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
353 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
354 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
355 }
356 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
357
358 /* add base */
359 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
360 {
361 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
362 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
363 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
364 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
365 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
366 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
367 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
368 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
369 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
370 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
371 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
372 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
373 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
374 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
375 /* complicated encodings */
376 case 5:
377 case 13:
378 if ((bRm & X86_MODRM_MOD_MASK) != 0)
379 {
380 if (!pVCpu->iem.s.uRexB)
381 {
382 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
383 SET_SS_DEF();
384 }
385 else
386 u64EffAddr += pVCpu->cpum.GstCtx.r13;
387 }
388 else
389 {
390 uint32_t u32Disp;
391 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
392 u64EffAddr += (int32_t)u32Disp;
393 uInfo |= u32Disp;
394 }
395 break;
396 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
397 }
398 break;
399 }
400 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
401 }
402
403 /* Get and add the displacement. */
404 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
405 {
406 case 0:
407 break;
408 case 1:
409 {
410 int8_t i8Disp;
411 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
412 u64EffAddr += i8Disp;
413 uInfo |= (uint32_t)(int32_t)i8Disp;
414 break;
415 }
416 case 2:
417 {
418 uint32_t u32Disp;
419 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
420 u64EffAddr += (int32_t)u32Disp;
421 uInfo |= u32Disp;
422 break;
423 }
424 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
425 }
426
427 }
428
429 *puInfo = uInfo;
430 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
431 {
432 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
433 return u64EffAddr;
434 }
435 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
436 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
437 return u64EffAddr & UINT32_MAX;
438}
439
440
441/*********************************************************************************************************************************
442* Translation Block Cache. *
443*********************************************************************************************************************************/
444
445/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
446static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
447{
448 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
449 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
450 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
451 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
452 if (cMsSinceUse1 != cMsSinceUse2)
453 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
454 if (pTb1->cUsed != pTb2->cUsed)
455 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
456 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
457 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
458 return 0;
459}
460
461#ifdef VBOX_STRICT
462/**
463 * Assertion helper that checks a collisions list count.
464 */
465static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
466{
467 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
468 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
469 while (pTb)
470 {
471 pTb = pTb->pNext;
472 cLeft--;
473 }
474 AssertMsg(cLeft == 0,
475 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
476 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
477}
478#endif
479
480
481DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
482{
483 STAM_PROFILE_START(&pTbCache->StatPrune, a);
484
485 /*
486 * First convert the collision list to an array.
487 */
488 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
489 uintptr_t cInserted = 0;
490 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
491
492 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
493
494 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
495 {
496 apSortedTbs[cInserted++] = pTbCollision;
497 pTbCollision = pTbCollision->pNext;
498 }
499
500 /* Free any excess (impossible). */
501 if (RT_LIKELY(!pTbCollision))
502 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
503 else
504 do
505 {
506 PIEMTB pTbToFree = pTbCollision;
507 pTbCollision = pTbToFree->pNext;
508 iemTbAllocatorFree(pVCpu, pTbToFree);
509 } while (pTbCollision);
510
511 /*
512 * Sort it by most recently used and usage count.
513 */
514 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
515
516 /* We keep half the list for now. Perhaps a bit aggressive... */
517 uintptr_t const cKeep = cInserted / 2;
518
519 /* First free up the TBs we don't wish to keep (before creating the new
520 list because otherwise the free code will scan the list for each one
521 without ever finding it). */
522 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
523 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
524
525 /* Then chain the new TB together with the ones we like to keep of the
526 existing ones and insert this list into the hash table. */
527 pTbCollision = pTb;
528 for (uintptr_t idx = 0; idx < cKeep; idx++)
529 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
530 pTbCollision->pNext = NULL;
531
532 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
533#ifdef VBOX_STRICT
534 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
535#endif
536
537 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
538}
539
540
541static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
542{
543 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
544 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
545 if (!pTbOldHead)
546 {
547 pTb->pNext = NULL;
548 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
549 }
550 else
551 {
552 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
553 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
554 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
555 {
556 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
557 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
558#ifdef VBOX_STRICT
559 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
560#endif
561 }
562 else
563 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
564 }
565}
566
567
568/**
569 * Unlinks @a pTb from the hash table if found in it.
570 *
571 * @returns true if unlinked, false if not present.
572 * @param pTbCache The hash table.
573 * @param pTb The TB to remove.
574 */
575static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
576{
577 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
578 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
579 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
580
581 /*
582 * At the head of the collision list?
583 */
584 if (pTbHash == pTb)
585 {
586 if (!pTb->pNext)
587 pTbCache->apHash[idxHash] = NULL;
588 else
589 {
590 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
591 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
592#ifdef VBOX_STRICT
593 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
594#endif
595 }
596 return true;
597 }
598
599 /*
600 * Search the collision list.
601 */
602 PIEMTB const pTbHead = pTbHash;
603 while (pTbHash)
604 {
605 PIEMTB const pNextTb = pTbHash->pNext;
606 if (pNextTb == pTb)
607 {
608 pTbHash->pNext = pTb->pNext;
609 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
610#ifdef VBOX_STRICT
611 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
612#endif
613 return true;
614 }
615 pTbHash = pNextTb;
616 }
617 return false;
618}
619
620
621/**
622 * Looks up a TB for the given PC and flags in the cache.
623 *
624 * @returns Pointer to TB on success, NULL if not found.
625 * @param pVCpu The cross context virtual CPU structure of the
626 * calling thread.
627 * @param pTbCache The translation block cache.
628 * @param GCPhysPc The PC to look up a TB for.
629 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
630 * the lookup.
631 * @thread EMT(pVCpu)
632 */
633static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
634 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
635{
636 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
637 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
638 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
639#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
640 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
641#endif
642 while (pTb)
643 {
644 if (pTb->GCPhysPc == GCPhysPc)
645 {
646 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
647 {
648 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
649 {
650 STAM_COUNTER_INC(&pTbCache->cLookupHits);
651 AssertMsg(cLeft > 0, ("%d\n", cLeft));
652
653 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
654 pTb->cUsed++;
655#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
656 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != 16)
657 {
658 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
659 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
660 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
661 return pTb;
662 }
663 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
664 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
665 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
666 return iemNativeRecompile(pVCpu, pTb);
667#else
668 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
669 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
670 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
671 return pTb;
672#endif
673 }
674 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
675 }
676 else
677 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
678 }
679 else
680 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
681
682 pTb = pTb->pNext;
683#ifdef VBOX_STRICT
684 cLeft--;
685#endif
686 }
687 AssertMsg(cLeft == 0, ("%d\n", cLeft));
688 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
689 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
690 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
691 return pTb;
692}
693
694
695/*********************************************************************************************************************************
696* Translation Block Allocator.
697*********************************************************************************************************************************/
698/*
699 * Translation block allocationmanagement.
700 */
701
702#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
703# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
704 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
705# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
706 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
707# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
708 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
709#else
710# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
711 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
712# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
713 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
714# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
715 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
716#endif
717/** Makes a TB index from a chunk index and TB index within that chunk. */
718#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
719 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
720
721
722/**
723 * Initializes the TB allocator and cache for an EMT.
724 *
725 * @returns VBox status code.
726 * @param pVM The VM handle.
727 * @param cInitialTbs The initial number of translation blocks to
728 * preallocator.
729 * @param cMaxTbs The max number of translation blocks allowed.
730 * @param cbInitialExec The initial size of the executable memory allocator.
731 * @param cbMaxExec The max size of the executable memory allocator.
732 * @param cbChunkExec The chunk size for executable memory allocator. Zero
733 * or UINT32_MAX for automatically determining this.
734 * @thread EMT
735 */
736DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
737 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
738{
739 PVMCPUCC pVCpu = VMMGetCpu(pVM);
740 Assert(!pVCpu->iem.s.pTbCacheR3);
741 Assert(!pVCpu->iem.s.pTbAllocatorR3);
742
743 /*
744 * Calculate the chunk size of the TB allocator.
745 * The minimum chunk size is 2MiB.
746 */
747 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
748 uint32_t cbPerChunk = _2M;
749 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
750#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
751 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
752 uint8_t cChunkShift = 21 - cTbShift;
753 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
754#endif
755 for (;;)
756 {
757 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
758 break;
759 cbPerChunk *= 2;
760 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
761#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
762 cChunkShift += 1;
763#endif
764 }
765
766 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
767 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
768 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
769
770 cMaxTbs = cMaxChunks * cTbsPerChunk;
771
772 /*
773 * Allocate and initalize it.
774 */
775 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
776 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
777 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
778 if (!pTbAllocator)
779 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
780 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
781 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
782 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
783 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
784 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
785 pTbAllocator->cbPerChunk = cbPerChunk;
786 pTbAllocator->cMaxTbs = cMaxTbs;
787#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
788 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
789 pTbAllocator->cChunkShift = cChunkShift;
790 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
791#endif
792
793 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
794 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
795
796 /*
797 * Allocate the initial chunks.
798 */
799 for (uint32_t idxChunk = 0; ; idxChunk++)
800 {
801 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
802 if (!paTbs)
803 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
804 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
805 cbPerChunk, idxChunk, pVCpu->idCpu);
806
807 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
808 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
809 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
810 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
811 pTbAllocator->cTotalTbs += cTbsPerChunk;
812
813 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
814 break;
815 }
816
817 /*
818 * Calculate the size of the hash table. We double the max TB count and
819 * round it up to the nearest power of two.
820 */
821 uint32_t cCacheEntries = cMaxTbs * 2;
822 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
823 {
824 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
825 cCacheEntries = RT_BIT_32(iBitTop);
826 Assert(cCacheEntries >= cMaxTbs * 2);
827 }
828
829 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
830 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
831 if (!pTbCache)
832 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
833 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
834 cbTbCache, cCacheEntries, pVCpu->idCpu);
835
836 /*
837 * Initialize it (assumes zeroed by the allocator).
838 */
839 pTbCache->uMagic = IEMTBCACHE_MAGIC;
840 pTbCache->cHash = cCacheEntries;
841 pTbCache->uHashMask = cCacheEntries - 1;
842 Assert(pTbCache->cHash > pTbCache->uHashMask);
843 pVCpu->iem.s.pTbCacheR3 = pTbCache;
844
845 /*
846 * Initialize the native executable memory allocator.
847 */
848#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
849 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
850 AssertLogRelRCReturn(rc, rc);
851#else
852 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
853#endif
854
855 return VINF_SUCCESS;
856}
857
858
859/**
860 * Inner free worker.
861 */
862static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
863 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
864{
865 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
866 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
867 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
868 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
869
870 /*
871 * Unlink the TB from the hash table.
872 */
873 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
874
875 /*
876 * Free the TB itself.
877 */
878 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
879 {
880 case IEMTB_F_TYPE_THREADED:
881 pTbAllocator->cThreadedTbs -= 1;
882 RTMemFree(pTb->Thrd.paCalls);
883 break;
884#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
885 case IEMTB_F_TYPE_NATIVE:
886 pTbAllocator->cNativeTbs -= 1;
887 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
888 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
889 break;
890#endif
891 default:
892 AssertFailed();
893 }
894 RTMemFree(pTb->pabOpcodes);
895
896 pTb->pNext = NULL;
897 pTb->fFlags = 0;
898 pTb->GCPhysPc = UINT64_MAX;
899 pTb->Gen.uPtr = 0;
900 pTb->Gen.uData = 0;
901 pTb->cbOpcodes = 0;
902 pTb->pabOpcodes = NULL;
903
904 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
905 Assert(pTbAllocator->cInUseTbs > 0);
906
907 pTbAllocator->cInUseTbs -= 1;
908 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
909}
910
911
912/**
913 * Frees the given TB.
914 *
915 * @param pVCpu The cross context virtual CPU structure of the calling
916 * thread.
917 * @param pTb The translation block to free.
918 * @thread EMT(pVCpu)
919 */
920static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
921{
922 /*
923 * Validate state.
924 */
925 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
926 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
927 uint8_t const idxChunk = pTb->idxAllocChunk;
928 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
929 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
930 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
931
932 /*
933 * Call inner worker.
934 */
935 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
936}
937
938
939/**
940 * Schedules a native TB for freeing when it's not longer being executed and
941 * part of the caller's call stack.
942 *
943 * The TB will be removed from the translation block cache, though, so it isn't
944 * possible to executed it again and the IEMTB::pNext member can be used to link
945 * it together with other TBs awaiting freeing.
946 *
947 * @param pVCpu The cross context virtual CPU structure of the calling
948 * thread.
949 * @param pTb The translation block to schedule for freeing.
950 */
951static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
952{
953 /*
954 * Validate state.
955 */
956 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
957 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
958 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
959 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
960 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
961 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
962 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
963 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
964
965 /*
966 * Remove it from the cache and prepend it to the allocator's todo list.
967 */
968 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
969
970 pTb->pNext = pTbAllocator->pDelayedFreeHead;
971 pTbAllocator->pDelayedFreeHead = pTb;
972}
973
974
975/**
976 * Processes the delayed frees.
977 *
978 * This is called by the allocator function as well as the native recompile
979 * function before making any TB or executable memory allocations respectively.
980 */
981void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
982{
983 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
984 pTbAllocator->pDelayedFreeHead = NULL;
985 while (pTb)
986 {
987 PIEMTB const pTbNext = pTb->pNext;
988 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
989 iemTbAllocatorFree(pVCpu, pTb);
990 pTb = pTbNext;
991 }
992}
993
994
995/**
996 * Grow the translation block allocator with another chunk.
997 */
998static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
999{
1000 /*
1001 * Validate state.
1002 */
1003 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1004 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1005 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1006 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1007 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1008
1009 /*
1010 * Allocate a new chunk and add it to the allocator.
1011 */
1012 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1013 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1014 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1015
1016 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1017 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1018 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1019 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1020 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1021 pTbAllocator->cTotalTbs += cTbsPerChunk;
1022 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1023
1024 return VINF_SUCCESS;
1025}
1026
1027
1028/**
1029 * Allocates a TB from allocator with free block.
1030 *
1031 * This is common code to both the fast and slow allocator code paths.
1032 */
1033DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1034{
1035 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1036
1037 int idxTb;
1038 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1039 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1040 pTbAllocator->cTotalTbs,
1041 pTbAllocator->iStartHint & ~(uint32_t)63);
1042 else
1043 idxTb = -1;
1044 if (idxTb < 0)
1045 {
1046 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1047 AssertLogRelReturn(idxTb >= 0, NULL);
1048 }
1049 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1050 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1051
1052 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1053 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1054 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1055 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1056 Assert(pTb->idxAllocChunk == idxChunk);
1057
1058 pTbAllocator->cInUseTbs += 1;
1059 if (fThreaded)
1060 pTbAllocator->cThreadedTbs += 1;
1061 else
1062 pTbAllocator->cNativeTbs += 1;
1063 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1064 return pTb;
1065}
1066
1067
1068/**
1069 * Slow path for iemTbAllocatorAlloc.
1070 */
1071static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1072{
1073 /*
1074 * With some luck we can add another chunk.
1075 */
1076 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1077 {
1078 int rc = iemTbAllocatorGrow(pVCpu);
1079 if (RT_SUCCESS(rc))
1080 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1081 }
1082
1083 /*
1084 * We have to prune stuff. Sigh.
1085 *
1086 * This requires scanning for older TBs and kick them out. Not sure how to
1087 * best do this as we don't want to maintain any list of TBs ordered by last
1088 * usage time. But one reasonably simple approach would be that each time we
1089 * get here we continue a sequential scan of the allocation chunks,
1090 * considering just a smallish number of TBs and freeing a fixed portion of
1091 * them. Say, we consider the next 128 TBs, freeing the least recently used
1092 * in out of groups of 4 TBs, resulting in 32 free TBs.
1093 */
1094 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1095 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1096 uint32_t const cTbsToPrune = 128;
1097 uint32_t const cTbsPerGroup = 4;
1098 uint32_t cFreedTbs = 0;
1099#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1100 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1101#else
1102 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1103#endif
1104 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1105 idxTbPruneFrom = 0;
1106 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1107 {
1108 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1109 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1110 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1111 uint32_t cMsAge = msNow - pTb->msLastUsed;
1112 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1113
1114 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1115 {
1116#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1117 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1118 { /* likely */ }
1119 else
1120 {
1121 idxInChunk2 = 0;
1122 idxChunk2 += 1;
1123 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1124 idxChunk2 = 0;
1125 }
1126#endif
1127 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1128 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1129 if ( cMsAge2 > cMsAge
1130 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1131 {
1132 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1133 pTb = pTb2;
1134 idxChunk = idxChunk2;
1135 idxInChunk = idxInChunk2;
1136 cMsAge = cMsAge2;
1137 }
1138 }
1139
1140 /* Free the TB. */
1141 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1142 cFreedTbs++; /* paranoia */
1143 }
1144 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1145 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1146
1147 /*
1148 * Allocate a TB from the ones we've pruned.
1149 */
1150 if (cFreedTbs)
1151 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1152 return NULL;
1153}
1154
1155
1156/**
1157 * Allocate a translation block.
1158 *
1159 * @returns Pointer to block on success, NULL if we're out and is unable to
1160 * free up an existing one (very unlikely once implemented).
1161 * @param pVCpu The cross context virtual CPU structure of the calling
1162 * thread.
1163 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1164 * For statistics.
1165 */
1166DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1167{
1168 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1169 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1170
1171 /* Free any pending TBs before we proceed. */
1172 if (!pTbAllocator->pDelayedFreeHead)
1173 { /* probably likely */ }
1174 else
1175 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1176
1177 /* If the allocator is full, take slow code path.*/
1178 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1179 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1180 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1181}
1182
1183
1184/**
1185 * This is called when we're out of space for native TBs.
1186 *
1187 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1188 * The difference is that we only prune native TBs and will only free any if
1189 * there are least two in a group. The conditions under which we're called are
1190 * different - there will probably be free TBs in the table when we're called.
1191 * Therefore we increase the group size and max scan length, though we'll stop
1192 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1193 * up at least 8 TBs.
1194 */
1195void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1196{
1197 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1198 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1199
1200 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1201
1202 /*
1203 * Flush the delayed free list before we start freeing TBs indiscriminately.
1204 */
1205 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1206
1207 /*
1208 * Scan and free TBs.
1209 */
1210 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1211 uint32_t const cTbsToPrune = 128 * 8;
1212 uint32_t const cTbsPerGroup = 4 * 4;
1213 uint32_t cFreedTbs = 0;
1214 uint32_t cMaxInstrs = 0;
1215 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1216 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1217 {
1218 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1219 idxTbPruneFrom = 0;
1220 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1221 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1222 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1223 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1224 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1225
1226 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1227 {
1228 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1229 { /* likely */ }
1230 else
1231 {
1232 idxInChunk2 = 0;
1233 idxChunk2 += 1;
1234 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1235 idxChunk2 = 0;
1236 }
1237 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1238 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1239 {
1240 cNativeTbs += 1;
1241 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1242 if ( cMsAge2 > cMsAge
1243 || ( cMsAge2 == cMsAge
1244 && ( pTb2->cUsed < pTb->cUsed
1245 || ( pTb2->cUsed == pTb->cUsed
1246 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1247 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1248 {
1249 pTb = pTb2;
1250 idxChunk = idxChunk2;
1251 idxInChunk = idxInChunk2;
1252 cMsAge = cMsAge2;
1253 }
1254 }
1255 }
1256
1257 /* Free the TB if we found at least two native one in this group. */
1258 if (cNativeTbs >= 2)
1259 {
1260 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1261 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1262 cFreedTbs++;
1263 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1264 break;
1265 }
1266 }
1267 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1268
1269 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1270}
1271
1272
1273/*********************************************************************************************************************************
1274* Threaded Recompiler Core *
1275*********************************************************************************************************************************/
1276
1277/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1278static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1279{
1280 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1281 pDis->cbCachedInstr += cbMaxRead;
1282 RT_NOREF(cbMinRead);
1283 return VERR_NO_DATA;
1284}
1285
1286
1287DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1288{
1289 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1290
1291 char szDisBuf[512];
1292
1293 /*
1294 * Print TB info.
1295 */
1296 pHlp->pfnPrintf(pHlp,
1297 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
1298 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1299 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
1300 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1301
1302 /*
1303 * This disassembly is driven by the debug info which follows the native
1304 * code and indicates when it starts with the next guest instructions,
1305 * where labels are and such things.
1306 */
1307 DISSTATE Dis;
1308 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1309 uint32_t const cCalls = pTb->Thrd.cCalls;
1310 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1311 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1312 : DISCPUMODE_64BIT;
1313 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1314 uint8_t idxRange = UINT8_MAX;
1315 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1316 uint32_t offRange = 0;
1317 uint32_t offOpcodes = 0;
1318 uint32_t const cbOpcodes = pTb->cbOpcodes;
1319 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1320
1321 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1322 {
1323 /*
1324 * New opcode range?
1325 */
1326 if ( idxRange == UINT8_MAX
1327 || idxRange >= cRanges
1328 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1329 {
1330 idxRange += 1;
1331 if (idxRange < cRanges)
1332 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1333 else
1334 continue;
1335 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1336 + (pTb->aRanges[idxRange].idxPhysPage == 0
1337 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1338 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1339 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1340 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1341 pTb->aRanges[idxRange].idxPhysPage);
1342 GCPhysPc += offRange;
1343 }
1344
1345 /*
1346 * Disassemble another guest instruction?
1347 */
1348 if ( paCalls[iCall].offOpcode != offOpcodes
1349 && paCalls[iCall].cbOpcode > 0
1350 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1351 {
1352 offOpcodes = paCalls[iCall].offOpcode;
1353 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1354 uint32_t cbInstr = 1;
1355 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1356 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1357 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1358 if (RT_SUCCESS(rc))
1359 {
1360 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1361 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1362 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1363 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1364 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1365 }
1366 else
1367 {
1368 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1369 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1370 cbInstr = paCalls[iCall].cbOpcode;
1371 }
1372 GCPhysPc += cbInstr;
1373 offRange += cbInstr;
1374 }
1375
1376 /*
1377 * Dump call details.
1378 */
1379 pHlp->pfnPrintf(pHlp,
1380 " Call #%u to %s (%u args)\n",
1381 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1382 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1383
1384 /*
1385 * Snoop fExec.
1386 */
1387 switch (paCalls[iCall].enmFunction)
1388 {
1389 default:
1390 break;
1391 case kIemThreadedFunc_BltIn_CheckMode:
1392 fExec = paCalls[iCall].auParams[0];
1393 break;
1394 }
1395 }
1396}
1397
1398
1399
1400/**
1401 * Allocate a translation block for threadeded recompilation.
1402 *
1403 * This is allocated with maxed out call table and storage for opcode bytes,
1404 * because it's only supposed to be called once per EMT to allocate the TB
1405 * pointed to by IEMCPU::pThrdCompileTbR3.
1406 *
1407 * @returns Pointer to the translation block on success, NULL on failure.
1408 * @param pVM The cross context virtual machine structure.
1409 * @param pVCpu The cross context virtual CPU structure of the calling
1410 * thread.
1411 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1412 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1413 */
1414static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1415{
1416 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1417 if (pTb)
1418 {
1419 unsigned const cCalls = 256;
1420 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1421 if (pTb->Thrd.paCalls)
1422 {
1423 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1424 if (pTb->pabOpcodes)
1425 {
1426 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1427 pTb->Thrd.cAllocated = cCalls;
1428 pTb->Thrd.cCalls = 0;
1429 pTb->cbOpcodes = 0;
1430 pTb->pNext = NULL;
1431 pTb->cUsed = 0;
1432 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1433 pTb->idxAllocChunk = UINT8_MAX;
1434 pTb->GCPhysPc = GCPhysPc;
1435 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1436 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1437 pTb->cInstructions = 0;
1438
1439 /* Init the first opcode range. */
1440 pTb->cRanges = 1;
1441 pTb->aRanges[0].cbOpcodes = 0;
1442 pTb->aRanges[0].offOpcodes = 0;
1443 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1444 pTb->aRanges[0].u2Unused = 0;
1445 pTb->aRanges[0].idxPhysPage = 0;
1446 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1447 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1448
1449 return pTb;
1450 }
1451 RTMemFree(pTb->Thrd.paCalls);
1452 }
1453 RTMemFree(pTb);
1454 }
1455 RT_NOREF(pVM);
1456 return NULL;
1457}
1458
1459
1460/**
1461 * Called on the TB that are dedicated for recompilation before it's reused.
1462 *
1463 * @param pVCpu The cross context virtual CPU structure of the calling
1464 * thread.
1465 * @param pTb The translation block to reuse.
1466 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1467 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1468 */
1469static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1470{
1471 pTb->GCPhysPc = GCPhysPc;
1472 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1473 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1474 pTb->Thrd.cCalls = 0;
1475 pTb->cbOpcodes = 0;
1476 pTb->cInstructions = 0;
1477
1478 /* Init the first opcode range. */
1479 pTb->cRanges = 1;
1480 pTb->aRanges[0].cbOpcodes = 0;
1481 pTb->aRanges[0].offOpcodes = 0;
1482 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1483 pTb->aRanges[0].u2Unused = 0;
1484 pTb->aRanges[0].idxPhysPage = 0;
1485 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1486 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1487}
1488
1489
1490/**
1491 * Used to duplicate a threded translation block after recompilation is done.
1492 *
1493 * @returns Pointer to the translation block on success, NULL on failure.
1494 * @param pVM The cross context virtual machine structure.
1495 * @param pVCpu The cross context virtual CPU structure of the calling
1496 * thread.
1497 * @param pTbSrc The TB to duplicate.
1498 */
1499static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1500{
1501 /*
1502 * Just using the heap for now. Will make this more efficient and
1503 * complicated later, don't worry. :-)
1504 */
1505 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1506 if (pTb)
1507 {
1508 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1509 memcpy(pTb, pTbSrc, sizeof(*pTb));
1510 pTb->idxAllocChunk = idxAllocChunk;
1511
1512 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1513 Assert(cCalls > 0);
1514 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1515 if (pTb->Thrd.paCalls)
1516 {
1517 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1518 Assert(cbOpcodes > 0);
1519 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1520 if (pTb->pabOpcodes)
1521 {
1522 pTb->Thrd.cAllocated = cCalls;
1523 pTb->pNext = NULL;
1524 pTb->cUsed = 0;
1525 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1526 pTb->fFlags = pTbSrc->fFlags;
1527
1528 return pTb;
1529 }
1530 RTMemFree(pTb->Thrd.paCalls);
1531 }
1532 iemTbAllocatorFree(pVCpu, pTb);
1533 }
1534 RT_NOREF(pVM);
1535 return NULL;
1536
1537}
1538
1539
1540/**
1541 * Adds the given TB to the hash table.
1542 *
1543 * @param pVCpu The cross context virtual CPU structure of the calling
1544 * thread.
1545 * @param pTbCache The cache to add it to.
1546 * @param pTb The translation block to add.
1547 */
1548static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1549{
1550 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1551
1552 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1553 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1554 if (LogIs12Enabled())
1555 {
1556 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1557 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1558 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1559 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1560 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1561 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1562 pTb->aRanges[idxRange].idxPhysPage == 0
1563 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1564 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1565 }
1566}
1567
1568
1569/**
1570 * Called by opcode verifier functions when they detect a problem.
1571 */
1572void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1573{
1574 /* Unless it's safe, we can only immediately free threaded TB, as we will
1575 have more code left to execute in native TBs when fSafeToFree == false. */
1576 if (fSafeToFree || (pTb->fFlags & IEMTB_F_TYPE_THREADED))
1577 iemTbAllocatorFree(pVCpu, pTb);
1578 else
1579 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1580}
1581
1582
1583/*
1584 * Real code.
1585 */
1586
1587#ifdef LOG_ENABLED
1588/**
1589 * Logs the current instruction.
1590 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1591 * @param pszFunction The IEM function doing the execution.
1592 * @param idxInstr The instruction number in the block.
1593 */
1594static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1595{
1596# ifdef IN_RING3
1597 if (LogIs2Enabled())
1598 {
1599 char szInstr[256];
1600 uint32_t cbInstr = 0;
1601 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1602 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1603 szInstr, sizeof(szInstr), &cbInstr);
1604
1605 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1606 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1607 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1608 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1609 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1610 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1611 " %s\n"
1612 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1613 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1614 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1615 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1616 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1617 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1618 szInstr));
1619
1620 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1621 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1622 }
1623 else
1624# endif
1625 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1626 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1627}
1628#endif /* LOG_ENABLED */
1629
1630
1631#if 0
1632static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1633{
1634 RT_NOREF(pVM, pVCpu);
1635 return rcStrict;
1636}
1637#endif
1638
1639
1640/**
1641 * Initializes the decoder state when compiling TBs.
1642 *
1643 * This presumes that fExec has already be initialized.
1644 *
1645 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1646 * to apply fixes to them as well.
1647 *
1648 * @param pVCpu The cross context virtual CPU structure of the calling
1649 * thread.
1650 * @param fReInit Clear for the first call for a TB, set for subsequent
1651 * calls from inside the compile loop where we can skip a
1652 * couple of things.
1653 * @param fExtraFlags The extra translation block flags when @a fReInit is
1654 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1655 * checked.
1656 */
1657DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1658{
1659 /* ASSUMES: That iemInitExec was already called and that anyone changing
1660 CPU state affecting the fExec bits since then will have updated fExec! */
1661 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1662 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1663
1664 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1665
1666 /* Decoder state: */
1667 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1668 pVCpu->iem.s.enmEffAddrMode = enmMode;
1669 if (enmMode != IEMMODE_64BIT)
1670 {
1671 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1672 pVCpu->iem.s.enmEffOpSize = enmMode;
1673 }
1674 else
1675 {
1676 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1677 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1678 }
1679 pVCpu->iem.s.fPrefixes = 0;
1680 pVCpu->iem.s.uRexReg = 0;
1681 pVCpu->iem.s.uRexB = 0;
1682 pVCpu->iem.s.uRexIndex = 0;
1683 pVCpu->iem.s.idxPrefix = 0;
1684 pVCpu->iem.s.uVex3rdReg = 0;
1685 pVCpu->iem.s.uVexLength = 0;
1686 pVCpu->iem.s.fEvexStuff = 0;
1687 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1688 pVCpu->iem.s.offModRm = 0;
1689 pVCpu->iem.s.iNextMapping = 0;
1690
1691 if (!fReInit)
1692 {
1693 pVCpu->iem.s.cActiveMappings = 0;
1694 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1695 pVCpu->iem.s.fEndTb = false;
1696 pVCpu->iem.s.fTbCheckOpcodes = false;
1697 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1698 pVCpu->iem.s.fTbCrossedPage = false;
1699 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1700 pVCpu->iem.s.fTbCurInstrIsSti = false;
1701 /* Force RF clearing and TF checking on first instruction in the block
1702 as we don't really know what came before and should assume the worst: */
1703 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1704 }
1705 else
1706 {
1707 Assert(pVCpu->iem.s.cActiveMappings == 0);
1708 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1709 Assert(pVCpu->iem.s.fEndTb == false);
1710 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1711 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1712 }
1713 pVCpu->iem.s.fTbCurInstr = 0;
1714
1715#ifdef DBGFTRACE_ENABLED
1716 switch (IEM_GET_CPU_MODE(pVCpu))
1717 {
1718 case IEMMODE_64BIT:
1719 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1720 break;
1721 case IEMMODE_32BIT:
1722 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1723 break;
1724 case IEMMODE_16BIT:
1725 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1726 break;
1727 }
1728#endif
1729}
1730
1731
1732/**
1733 * Initializes the opcode fetcher when starting the compilation.
1734 *
1735 * @param pVCpu The cross context virtual CPU structure of the calling
1736 * thread.
1737 */
1738DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1739{
1740 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1741#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1742 pVCpu->iem.s.offOpcode = 0;
1743#else
1744 RT_NOREF(pVCpu);
1745#endif
1746}
1747
1748
1749/**
1750 * Re-initializes the opcode fetcher between instructions while compiling.
1751 *
1752 * @param pVCpu The cross context virtual CPU structure of the calling
1753 * thread.
1754 */
1755DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1756{
1757 if (pVCpu->iem.s.pbInstrBuf)
1758 {
1759 uint64_t off = pVCpu->cpum.GstCtx.rip;
1760 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1761 off += pVCpu->cpum.GstCtx.cs.u64Base;
1762 off -= pVCpu->iem.s.uInstrBufPc;
1763 if (off < pVCpu->iem.s.cbInstrBufTotal)
1764 {
1765 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1766 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1767 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1768 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1769 else
1770 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1771 }
1772 else
1773 {
1774 pVCpu->iem.s.pbInstrBuf = NULL;
1775 pVCpu->iem.s.offInstrNextByte = 0;
1776 pVCpu->iem.s.offCurInstrStart = 0;
1777 pVCpu->iem.s.cbInstrBuf = 0;
1778 pVCpu->iem.s.cbInstrBufTotal = 0;
1779 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1780 }
1781 }
1782 else
1783 {
1784 pVCpu->iem.s.offInstrNextByte = 0;
1785 pVCpu->iem.s.offCurInstrStart = 0;
1786 pVCpu->iem.s.cbInstrBuf = 0;
1787 pVCpu->iem.s.cbInstrBufTotal = 0;
1788#ifdef VBOX_STRICT
1789 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1790#endif
1791 }
1792#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1793 pVCpu->iem.s.offOpcode = 0;
1794#endif
1795}
1796
1797#ifdef LOG_ENABLED
1798
1799/**
1800 * Inserts a NOP call.
1801 *
1802 * This is for debugging.
1803 *
1804 * @returns true on success, false if we're out of call entries.
1805 * @param pTb The translation block being compiled.
1806 */
1807bool iemThreadedCompileEmitNop(PIEMTB pTb)
1808{
1809 /* Emit the call. */
1810 uint32_t const idxCall = pTb->Thrd.cCalls;
1811 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1812 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1813 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1814 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
1815 pCall->idxInstr = pTb->cInstructions - 1;
1816 pCall->uUnused0 = 0;
1817 pCall->offOpcode = 0;
1818 pCall->cbOpcode = 0;
1819 pCall->idxRange = 0;
1820 pCall->auParams[0] = 0;
1821 pCall->auParams[1] = 0;
1822 pCall->auParams[2] = 0;
1823 return true;
1824}
1825
1826
1827/**
1828 * Called by iemThreadedCompile if cpu state logging is desired.
1829 *
1830 * @returns true on success, false if we're out of call entries.
1831 * @param pTb The translation block being compiled.
1832 */
1833bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
1834{
1835 /* Emit the call. */
1836 uint32_t const idxCall = pTb->Thrd.cCalls;
1837 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1838 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1839 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1840 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
1841 pCall->idxInstr = pTb->cInstructions - 1;
1842 pCall->uUnused0 = 0;
1843 pCall->offOpcode = 0;
1844 pCall->cbOpcode = 0;
1845 pCall->idxRange = 0;
1846 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
1847 pCall->auParams[1] = 0;
1848 pCall->auParams[2] = 0;
1849 return true;
1850}
1851
1852#endif /* LOG_ENABLED */
1853
1854DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1855{
1856 switch (cbInstr)
1857 {
1858 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1859 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1860 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1861 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1862 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1863 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1864 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1865 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1866 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1867 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1868 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1869 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1870 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1871 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1872 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1873 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1874 }
1875}
1876
1877
1878/**
1879 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1880 *
1881 * - CS LIM check required.
1882 * - Must recheck opcode bytes.
1883 * - Previous instruction branched.
1884 * - TLB load detected, probably due to page crossing.
1885 *
1886 * @returns true if everything went well, false if we're out of space in the TB
1887 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1888 * @param pVCpu The cross context virtual CPU structure of the calling
1889 * thread.
1890 * @param pTb The translation block being compiled.
1891 */
1892bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1893{
1894 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1895 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1896#if 0
1897 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1898 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1899#endif
1900
1901 /*
1902 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1903 * see if it's needed to start checking.
1904 */
1905 bool fConsiderCsLimChecking;
1906 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1907 if ( fMode == IEM_F_MODE_X86_64BIT
1908 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
1909 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
1910 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
1911 fConsiderCsLimChecking = false; /* already enabled or not needed */
1912 else
1913 {
1914 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1915 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1916 fConsiderCsLimChecking = true; /* likely */
1917 else
1918 {
1919 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
1920 return false;
1921 }
1922 }
1923
1924 /*
1925 * Prepare call now, even before we know if can accept the instruction in this TB.
1926 * This allows us amending parameters w/o making every case suffer.
1927 */
1928 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1929 uint16_t const offOpcode = pTb->cbOpcodes;
1930 uint8_t idxRange = pTb->cRanges - 1;
1931
1932 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1933 pCall->idxInstr = pTb->cInstructions;
1934 pCall->offOpcode = offOpcode;
1935 pCall->idxRange = idxRange;
1936 pCall->cbOpcode = cbInstr;
1937 pCall->auParams[0] = (uint32_t)cbInstr
1938 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
1939 /* The upper dword is sometimes used for cbStartPage. */;
1940 pCall->auParams[1] = idxRange;
1941 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1942
1943/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1944 * gotten onto. If we do, stop */
1945
1946 /*
1947 * Case 1: We've branched (RIP changed).
1948 *
1949 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1950 * Req: 1 extra range, no extra phys.
1951 *
1952 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1953 * necessary (fTbCrossedPage is true).
1954 * Req: 1 extra range, probably 1 extra phys page entry.
1955 *
1956 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1957 * but in addition we cross into the following page and require
1958 * another TLB load.
1959 * Req: 2 extra ranges, probably 2 extra phys page entries.
1960 *
1961 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1962 * the following page (thus fTbCrossedPage is true).
1963 * Req: 2 extra ranges, probably 1 extra phys page entry.
1964 *
1965 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1966 * it may trigger "spuriously" from the CPU point of view because of
1967 * physical page changes that'll invalid the physical TLB and trigger a
1968 * call to the function. In theory this be a big deal, just a bit
1969 * performance loss as we'll pick the LoadingTlb variants.
1970 *
1971 * Note! We do not currently optimize branching to the next instruction (sorry
1972 * 32-bit PIC code). We could maybe do that in the branching code that
1973 * sets (or not) fTbBranched.
1974 */
1975 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1976 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1977 * code. This'll require filtering out far jmps and calls, as they
1978 * load CS which should technically be considered indirect since the
1979 * GDT/LDT entry's base address can be modified independently from
1980 * the code. */
1981 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
1982 {
1983 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1984 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1985 {
1986 /* 1a + 1b - instruction fully within the branched to page. */
1987 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1988 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1989
1990 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1991 {
1992 /* Check that we've got a free range. */
1993 idxRange += 1;
1994 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1995 { /* likely */ }
1996 else
1997 {
1998 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1999 return false;
2000 }
2001 pCall->idxRange = idxRange;
2002 pCall->auParams[1] = idxRange;
2003 pCall->auParams[2] = 0;
2004
2005 /* Check that we've got a free page slot. */
2006 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2007 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2008 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2009 pTb->aRanges[idxRange].idxPhysPage = 0;
2010 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2011 || pTb->aGCPhysPages[0] == GCPhysNew)
2012 {
2013 pTb->aGCPhysPages[0] = GCPhysNew;
2014 pTb->aRanges[idxRange].idxPhysPage = 1;
2015 }
2016 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2017 || pTb->aGCPhysPages[1] == GCPhysNew)
2018 {
2019 pTb->aGCPhysPages[1] = GCPhysNew;
2020 pTb->aRanges[idxRange].idxPhysPage = 2;
2021 }
2022 else
2023 {
2024 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2025 return false;
2026 }
2027
2028 /* Finish setting up the new range. */
2029 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2030 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2031 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2032 pTb->aRanges[idxRange].u2Unused = 0;
2033 pTb->cRanges++;
2034 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2035 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2036 pTb->aRanges[idxRange].offOpcodes));
2037 }
2038 else
2039 {
2040 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2041 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2042 }
2043
2044 /* Determin which function we need to load & check.
2045 Note! For jumps to a new page, we'll set both fTbBranched and
2046 fTbCrossedPage to avoid unnecessary TLB work for intra
2047 page branching */
2048 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2049 || pVCpu->iem.s.fTbCrossedPage)
2050 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2051 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2052 : !fConsiderCsLimChecking
2053 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2054 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2055 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2056 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2057 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2058 : !fConsiderCsLimChecking
2059 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2060 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2061 else
2062 {
2063 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2064 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2065 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2066 : !fConsiderCsLimChecking
2067 ? kIemThreadedFunc_BltIn_CheckOpcodes
2068 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2069 }
2070 }
2071 else
2072 {
2073 /* 1c + 1d - instruction crosses pages. */
2074 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2075 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2076
2077 /* Lazy bird: Check that this isn't case 1c, since we've already
2078 load the first physical address. End the TB and
2079 make it a case 2b instead.
2080
2081 Hmm. Too much bother to detect, so just do the same
2082 with case 1d as well. */
2083#if 0 /** @todo get back to this later when we've got the actual branch code in
2084 * place. */
2085 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2086
2087 /* Check that we've got two free ranges. */
2088 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2089 { /* likely */ }
2090 else
2091 return false;
2092 idxRange += 1;
2093 pCall->idxRange = idxRange;
2094 pCall->auParams[1] = idxRange;
2095 pCall->auParams[2] = 0;
2096
2097 /* ... */
2098
2099#else
2100 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2101 return false;
2102#endif
2103 }
2104 }
2105
2106 /*
2107 * Case 2: Page crossing.
2108 *
2109 * Sub-case 2a: The instruction starts on the first byte in the next page.
2110 *
2111 * Sub-case 2b: The instruction has opcode bytes in both the current and
2112 * following page.
2113 *
2114 * Both cases requires a new range table entry and probably a new physical
2115 * page entry. The difference is in which functions to emit and whether to
2116 * add bytes to the current range.
2117 */
2118 else if (pVCpu->iem.s.fTbCrossedPage)
2119 {
2120 /* Check that we've got a free range. */
2121 idxRange += 1;
2122 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2123 { /* likely */ }
2124 else
2125 {
2126 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2127 return false;
2128 }
2129
2130 /* Check that we've got a free page slot. */
2131 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2132 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2133 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2134 pTb->aRanges[idxRange].idxPhysPage = 0;
2135 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2136 || pTb->aGCPhysPages[0] == GCPhysNew)
2137 {
2138 pTb->aGCPhysPages[0] = GCPhysNew;
2139 pTb->aRanges[idxRange].idxPhysPage = 1;
2140 }
2141 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2142 || pTb->aGCPhysPages[1] == GCPhysNew)
2143 {
2144 pTb->aGCPhysPages[1] = GCPhysNew;
2145 pTb->aRanges[idxRange].idxPhysPage = 2;
2146 }
2147 else
2148 {
2149 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2150 return false;
2151 }
2152
2153 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2154 {
2155 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2156 pCall->idxRange = idxRange;
2157 pCall->auParams[1] = idxRange;
2158 pCall->auParams[2] = 0;
2159
2160 /* Finish setting up the new range. */
2161 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2162 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2163 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2164 pTb->aRanges[idxRange].u2Unused = 0;
2165 pTb->cRanges++;
2166 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2167 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2168 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2169
2170 /* Determin which function we need to load & check. */
2171 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2172 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2173 : !fConsiderCsLimChecking
2174 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2175 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2176 }
2177 else
2178 {
2179 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2180 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2181 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2182 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2183
2184 /* We've good. Split the instruction over the old and new range table entries. */
2185 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2186
2187 pTb->aRanges[idxRange].offPhysPage = 0;
2188 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2189 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2190 pTb->aRanges[idxRange].u2Unused = 0;
2191 pTb->cRanges++;
2192 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2193 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2194 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2195
2196 /* Determin which function we need to load & check. */
2197 if (pVCpu->iem.s.fTbCheckOpcodes)
2198 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2199 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2200 : !fConsiderCsLimChecking
2201 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2202 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2203 else
2204 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2205 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2206 : !fConsiderCsLimChecking
2207 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2208 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2209 }
2210 }
2211
2212 /*
2213 * Regular case: No new range required.
2214 */
2215 else
2216 {
2217 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2218 if (pVCpu->iem.s.fTbCheckOpcodes)
2219 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2220 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2221 : kIemThreadedFunc_BltIn_CheckOpcodes;
2222 else
2223 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2224
2225 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2226 pTb->cbOpcodes = offOpcode + cbInstr;
2227 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2228 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2229 }
2230
2231 /*
2232 * Commit the call.
2233 */
2234 pTb->Thrd.cCalls++;
2235
2236 /*
2237 * Clear state.
2238 */
2239 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2240 pVCpu->iem.s.fTbCrossedPage = false;
2241 pVCpu->iem.s.fTbCheckOpcodes = false;
2242
2243 /*
2244 * Copy opcode bytes.
2245 */
2246 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2247 pTb->cbOpcodes = offOpcode + cbInstr;
2248 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2249
2250 return true;
2251}
2252
2253
2254/**
2255 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2256 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2257 *
2258 * @returns true if anything is pending, false if not.
2259 * @param pVCpu The cross context virtual CPU structure of the calling
2260 * thread.
2261 */
2262DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2263{
2264 uint64_t fCpu = pVCpu->fLocalForcedActions;
2265 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2266#if 1
2267 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2268 if (RT_LIKELY( !fCpu
2269 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2270 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2271 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2272 return false;
2273 return true;
2274#else
2275 return false;
2276#endif
2277
2278}
2279
2280
2281/**
2282 * Called by iemThreadedCompile when a block requires a mode check.
2283 *
2284 * @returns true if we should continue, false if we're out of call entries.
2285 * @param pVCpu The cross context virtual CPU structure of the calling
2286 * thread.
2287 * @param pTb The translation block being compiled.
2288 */
2289static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2290{
2291 /* Emit the call. */
2292 uint32_t const idxCall = pTb->Thrd.cCalls;
2293 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2294 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2295 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2296 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2297 pCall->idxInstr = pTb->cInstructions - 1;
2298 pCall->uUnused0 = 0;
2299 pCall->offOpcode = 0;
2300 pCall->cbOpcode = 0;
2301 pCall->idxRange = 0;
2302 pCall->auParams[0] = pVCpu->iem.s.fExec;
2303 pCall->auParams[1] = 0;
2304 pCall->auParams[2] = 0;
2305 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2306 return true;
2307}
2308
2309
2310/**
2311 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2312 * set.
2313 *
2314 * @returns true if we should continue, false if an IRQ is deliverable or a
2315 * relevant force flag is pending.
2316 * @param pVCpu The cross context virtual CPU structure of the calling
2317 * thread.
2318 * @param pTb The translation block being compiled.
2319 * @sa iemThreadedCompileCheckIrq
2320 */
2321bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2322{
2323 /*
2324 * Skip this we've already emitted a call after the previous instruction
2325 * or if it's the first call, as we're always checking FFs between blocks.
2326 */
2327 uint32_t const idxCall = pTb->Thrd.cCalls;
2328 if ( idxCall > 0
2329 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2330 {
2331 /* Emit the call. */
2332 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2333 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2334 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2335 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2336 pCall->idxInstr = pTb->cInstructions;
2337 pCall->uUnused0 = 0;
2338 pCall->offOpcode = 0;
2339 pCall->cbOpcode = 0;
2340 pCall->idxRange = 0;
2341 pCall->auParams[0] = 0;
2342 pCall->auParams[1] = 0;
2343 pCall->auParams[2] = 0;
2344 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2345
2346 /* Reset the IRQ check value. */
2347 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2348
2349 /*
2350 * Check for deliverable IRQs and pending force flags.
2351 */
2352 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2353 }
2354 return true; /* continue */
2355}
2356
2357
2358/**
2359 * Emits an IRQ check call and checks for pending IRQs.
2360 *
2361 * @returns true if we should continue, false if an IRQ is deliverable or a
2362 * relevant force flag is pending.
2363 * @param pVCpu The cross context virtual CPU structure of the calling
2364 * thread.
2365 * @param pTb The transation block.
2366 * @sa iemThreadedCompileBeginEmitCallsComplications
2367 */
2368static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2369{
2370 /* Check again in a little bit, unless it is immediately following an STI
2371 in which case we *must* check immediately after the next instruction
2372 as well in case it's executed with interrupt inhibition. We could
2373 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2374 bs3-timers-1 which is doing sti + sti + cli. */
2375 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2376 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2377 else
2378 {
2379 pVCpu->iem.s.fTbCurInstrIsSti = false;
2380 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2381 }
2382 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2383
2384 /*
2385 * Emit the call.
2386 */
2387 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2388 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2389 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2390 pCall->idxInstr = pTb->cInstructions;
2391 pCall->uUnused0 = 0;
2392 pCall->offOpcode = 0;
2393 pCall->cbOpcode = 0;
2394 pCall->idxRange = 0;
2395 pCall->auParams[0] = 0;
2396 pCall->auParams[1] = 0;
2397 pCall->auParams[2] = 0;
2398
2399 /*
2400 * Check for deliverable IRQs and pending force flags.
2401 */
2402 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2403}
2404
2405
2406/**
2407 * Compiles a new TB and executes it.
2408 *
2409 * We combine compilation and execution here as it makes it simpler code flow
2410 * in the main loop and it allows interpreting while compiling if we want to
2411 * explore that option.
2412 *
2413 * @returns Strict VBox status code.
2414 * @param pVM The cross context virtual machine structure.
2415 * @param pVCpu The cross context virtual CPU structure of the calling
2416 * thread.
2417 * @param GCPhysPc The physical address corresponding to the current
2418 * RIP+CS.BASE.
2419 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2420 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2421 */
2422static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2423{
2424 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2425 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2426
2427 /*
2428 * Get the TB we use for the recompiling. This is a maxed-out TB so
2429 * that'll we'll make a more efficient copy of when we're done compiling.
2430 */
2431 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2432 if (pTb)
2433 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2434 else
2435 {
2436 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2437 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2438 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2439 }
2440
2441 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2442 functions may get at it. */
2443 pVCpu->iem.s.pCurTbR3 = pTb;
2444
2445#if 0
2446 /* Make sure the CheckIrq condition matches the one in EM. */
2447 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2448 const uint32_t cZeroCalls = 1;
2449#else
2450 const uint32_t cZeroCalls = 0;
2451#endif
2452
2453 /*
2454 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2455 */
2456 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2457 iemThreadedCompileInitOpcodeFetching(pVCpu);
2458 VBOXSTRICTRC rcStrict;
2459 for (;;)
2460 {
2461 /* Process the next instruction. */
2462#ifdef LOG_ENABLED
2463 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2464 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2465 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2466 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2467#endif
2468 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2469 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2470
2471 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2472#if 0
2473 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2474 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2475 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2476#endif
2477 if ( rcStrict == VINF_SUCCESS
2478 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2479 && !pVCpu->iem.s.fEndTb)
2480 {
2481 Assert(pTb->Thrd.cCalls > cCallsPrev);
2482 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2483
2484 pVCpu->iem.s.cInstructions++;
2485
2486 /* Check for mode change _after_ certain CIMPL calls, so check that
2487 we continue executing with the same mode value. */
2488 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2489 { /* probable */ }
2490 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2491 { /* extremely likely */ }
2492 else
2493 break;
2494
2495#if defined(LOG_ENABLED) && 0 /* for debugging */
2496 //iemThreadedCompileEmitNop(pTb);
2497 iemThreadedCompileEmitLogCpuState(pTb);
2498#endif
2499 }
2500 else
2501 {
2502 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2503 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2504 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2505 rcStrict = VINF_SUCCESS;
2506
2507 if (pTb->Thrd.cCalls > cZeroCalls)
2508 {
2509 if (cCallsPrev != pTb->Thrd.cCalls)
2510 pVCpu->iem.s.cInstructions++;
2511 break;
2512 }
2513
2514 pVCpu->iem.s.pCurTbR3 = NULL;
2515 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2516 }
2517
2518 /* Check for IRQs? */
2519 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2520 pVCpu->iem.s.cInstrTillIrqCheck--;
2521 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2522 break;
2523
2524 /* Still space in the TB? */
2525 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2526 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated)
2527 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2528 else
2529 {
2530 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2531 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2532 break;
2533 }
2534 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2535 }
2536
2537 /*
2538 * Duplicate the TB into a completed one and link it.
2539 */
2540 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2541 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2542
2543 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2544
2545#ifdef IEM_COMPILE_ONLY_MODE
2546 /*
2547 * Execute the translation block.
2548 */
2549#endif
2550
2551 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2552}
2553
2554
2555
2556/*********************************************************************************************************************************
2557* Recompiled Execution Core *
2558*********************************************************************************************************************************/
2559
2560
2561/**
2562 * Executes a translation block.
2563 *
2564 * @returns Strict VBox status code.
2565 * @param pVCpu The cross context virtual CPU structure of the calling
2566 * thread.
2567 * @param pTb The translation block to execute.
2568 */
2569static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2570{
2571 /*
2572 * Check the opcodes in the first page before starting execution.
2573 */
2574/** @todo this test should take IEMTB_F_CS_LIM_CHECKS into account or something.
2575 * The 'near jmp+call' test in bs3-cpu-basic-2 triggers the 2nd assertion here by
2576 * altering the CS limit such that only one or the two instruction bytes are valid.
2577 * Since it's a CS_LIMT problem, the pbInstrBuf is good for the full length, and
2578 * the test succeeds if skipped, but we assert in debug builds. */
2579 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2580 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2581 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2582 { /* likely */ }
2583 else
2584 {
2585 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2586 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2587 return VINF_SUCCESS;
2588 }
2589
2590 /*
2591 * Set the current TB so CIMPL functions may get at it.
2592 */
2593 pVCpu->iem.s.pCurTbR3 = pTb;
2594
2595 /*
2596 * Execute the block.
2597 */
2598#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2599 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2600 {
2601 pVCpu->iem.s.cTbExecNative++;
2602# ifdef LOG_ENABLED
2603 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2604# endif
2605# ifdef RT_ARCH_AMD64
2606 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2607# else
2608 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2609# endif
2610# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2611 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2612# endif
2613 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2614 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2615 { /* likely */ }
2616 else
2617 {
2618 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2619 pVCpu->iem.s.pCurTbR3 = NULL;
2620 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2621
2622 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2623 only to break out of TB execution early. */
2624 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2625 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2626
2627 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2628 and converted to VINF_SUCCESS or whatever is appropriate. */
2629 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2630 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2631
2632 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2633 }
2634 }
2635 else
2636#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2637 {
2638 /*
2639 * The threaded execution loop.
2640 */
2641 pVCpu->iem.s.cTbExecThreaded++;
2642#ifdef LOG_ENABLED
2643 uint64_t uRipPrev = UINT64_MAX;
2644#endif
2645 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2646 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2647 while (cCallsLeft-- > 0)
2648 {
2649#ifdef LOG_ENABLED
2650 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2651 {
2652 uRipPrev = pVCpu->cpum.GstCtx.rip;
2653 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2654 }
2655 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2656 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2657 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2658#endif
2659#ifdef VBOX_WITH_STATISTICS
2660 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2661 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2662#endif
2663 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2664 pCallEntry->auParams[0],
2665 pCallEntry->auParams[1],
2666 pCallEntry->auParams[2]);
2667 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2668 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2669 pCallEntry++;
2670 else
2671 {
2672 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2673 pVCpu->iem.s.pCurTbR3 = NULL;
2674 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2675
2676 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2677 only to break out of TB execution early. */
2678 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2679 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2680 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2681 }
2682 }
2683 }
2684
2685 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2686 pVCpu->iem.s.pCurTbR3 = NULL;
2687 return VINF_SUCCESS;
2688}
2689
2690
2691/**
2692 * This is called when the PC doesn't match the current pbInstrBuf.
2693 *
2694 * Upon return, we're ready for opcode fetching. But please note that
2695 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2696 * MMIO or unassigned).
2697 */
2698static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2699{
2700 pVCpu->iem.s.pbInstrBuf = NULL;
2701 pVCpu->iem.s.offCurInstrStart = 0;
2702 pVCpu->iem.s.offInstrNextByte = 0;
2703 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2704 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2705}
2706
2707
2708/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2709DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2710{
2711 /*
2712 * Set uCurTbStartPc to RIP and calc the effective PC.
2713 */
2714 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2715 pVCpu->iem.s.uCurTbStartPc = uPc;
2716 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2717 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2718
2719 /*
2720 * Advance within the current buffer (PAGE) when possible.
2721 */
2722 if (pVCpu->iem.s.pbInstrBuf)
2723 {
2724 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2725 if (off < pVCpu->iem.s.cbInstrBufTotal)
2726 {
2727 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2728 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2729 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2730 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2731 else
2732 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2733
2734 return pVCpu->iem.s.GCPhysInstrBuf + off;
2735 }
2736 }
2737 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2738}
2739
2740
2741/**
2742 * Determines the extra IEMTB_F_XXX flags.
2743 *
2744 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2745 * IEMTB_F_CS_LIM_CHECKS (or zero).
2746 * @param pVCpu The cross context virtual CPU structure of the calling
2747 * thread.
2748 */
2749DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2750{
2751 uint32_t fRet = 0;
2752
2753 /*
2754 * Determine the inhibit bits.
2755 */
2756 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2757 { /* typical */ }
2758 else
2759 {
2760 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2761 fRet |= IEMTB_F_INHIBIT_SHADOW;
2762 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2763 fRet |= IEMTB_F_INHIBIT_NMI;
2764 }
2765
2766 /*
2767 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2768 * likely to go invalid before the end of the translation block.
2769 */
2770 if (IEM_IS_64BIT_CODE(pVCpu))
2771 return fRet;
2772
2773 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2774 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2775 return fRet;
2776 return fRet | IEMTB_F_CS_LIM_CHECKS;
2777}
2778
2779
2780VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2781{
2782 /*
2783 * See if there is an interrupt pending in TRPM, inject it if we can.
2784 */
2785 if (!TRPMHasTrap(pVCpu))
2786 { /* likely */ }
2787 else
2788 {
2789 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2790 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2791 { /*likely */ }
2792 else
2793 return rcStrict;
2794 }
2795
2796 /*
2797 * Init the execution environment.
2798 */
2799#if 1 /** @todo this seems like a good idea, however if we ever share memory
2800 * directly with other threads on the host, it isn't necessarily... */
2801 if (pVM->cCpus == 1)
2802 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
2803 else
2804#endif
2805 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2806 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2807 { }
2808 else
2809 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2810
2811 /*
2812 * Run-loop.
2813 *
2814 * If we're using setjmp/longjmp we combine all the catching here to avoid
2815 * having to call setjmp for each block we're executing.
2816 */
2817 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2818 for (;;)
2819 {
2820 PIEMTB pTb = NULL;
2821 VBOXSTRICTRC rcStrict;
2822 IEM_TRY_SETJMP(pVCpu, rcStrict)
2823 {
2824 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2825 for (uint32_t iIterations = 0; ; iIterations++)
2826 {
2827 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2828 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2829 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
2830 {
2831 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2832 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2833 if (pTb)
2834 rcStrict = iemTbExec(pVCpu, pTb);
2835 else
2836 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2837 }
2838 else
2839 {
2840 /* This can only happen if the current PC cannot be translated into a
2841 host pointer, which means we're in MMIO or unmapped memory... */
2842#if defined(VBOX_STRICT) && defined(IN_RING3)
2843 rcStrict = DBGFSTOP(pVM);
2844 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
2845 return rcStrict;
2846#endif
2847 rcStrict = IEMExecLots(pVCpu, 2048, cPollRate, NULL);
2848 }
2849 if (rcStrict == VINF_SUCCESS)
2850 {
2851 Assert(pVCpu->iem.s.cActiveMappings == 0);
2852
2853 uint64_t fCpu = pVCpu->fLocalForcedActions;
2854 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2855 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2856 | VMCPU_FF_TLB_FLUSH
2857 | VMCPU_FF_UNHALT );
2858 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2859 if (RT_LIKELY( ( !fCpu
2860 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2861 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2862 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2863 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2864 {
2865 if (RT_LIKELY( (iIterations & cPollRate) != 0
2866 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2867 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2868 else
2869 return VINF_SUCCESS;
2870 }
2871 else
2872 return VINF_SUCCESS;
2873 }
2874 else
2875 return rcStrict;
2876 }
2877 }
2878 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2879 {
2880 pVCpu->iem.s.cLongJumps++;
2881#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2882 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2883#endif
2884 if (pVCpu->iem.s.cActiveMappings > 0)
2885 iemMemRollback(pVCpu);
2886
2887#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2888 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
2889 {
2890 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
2891 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
2892 }
2893#endif
2894
2895#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2896 /* If pTb isn't NULL we're in iemTbExec. */
2897 if (!pTb)
2898 {
2899 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2900 pTb = pVCpu->iem.s.pCurTbR3;
2901 if (pTb)
2902 {
2903 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
2904 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
2905 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
2906 }
2907 }
2908#endif
2909 return rcStrict;
2910 }
2911 IEM_CATCH_LONGJMP_END(pVCpu);
2912 }
2913}
2914
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette