VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 101505

Last change on this file since 101505 was 101484, checked in by vboxsync, 14 months ago

VMM/IEM: Basic register allocator sketches that incorporates simple skipping of guest register value loads. Sketched out varable and argument managmenet. Start telling GDB our jitted code to help with backtraces. ++ bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.8 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 101484 2023-10-18 01:32:17Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) :
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95
96
97/*
98 * Narrow down configs here to avoid wasting time on unused configs here.
99 */
100
101#ifndef IEM_WITH_CODE_TLB
102# error The code TLB must be enabled for the recompiler.
103#endif
104
105#ifndef IEM_WITH_DATA_TLB
106# error The data TLB must be enabled for the recompiler.
107#endif
108
109#ifndef IEM_WITH_SETJMP
110# error The setjmp approach must be enabled for the recompiler.
111#endif
112
113
114/*********************************************************************************************************************************
115* Internal Functions *
116*********************************************************************************************************************************/
117static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
118
119
120/**
121 * Calculates the effective address of a ModR/M memory operand, extended version
122 * for use in the recompilers.
123 *
124 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
125 *
126 * May longjmp on internal error.
127 *
128 * @return The effective address.
129 * @param pVCpu The cross context virtual CPU structure of the calling thread.
130 * @param bRm The ModRM byte.
131 * @param cbImmAndRspOffset - First byte: The size of any immediate
132 * following the effective address opcode bytes
133 * (only for RIP relative addressing).
134 * - Second byte: RSP displacement (for POP [ESP]).
135 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
136 * SIB byte (bits 39:32).
137 *
138 * @note This must be defined in a source file with matching
139 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
140 * or implemented differently...
141 */
142RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
143{
144 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
145# define SET_SS_DEF() \
146 do \
147 { \
148 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
149 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
150 } while (0)
151
152 if (!IEM_IS_64BIT_CODE(pVCpu))
153 {
154/** @todo Check the effective address size crap! */
155 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
156 {
157 uint16_t u16EffAddr;
158
159 /* Handle the disp16 form with no registers first. */
160 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
161 {
162 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
163 *puInfo = u16EffAddr;
164 }
165 else
166 {
167 /* Get the displacment. */
168 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
169 {
170 case 0: u16EffAddr = 0; break;
171 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
172 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
173 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
174 }
175 *puInfo = u16EffAddr;
176
177 /* Add the base and index registers to the disp. */
178 switch (bRm & X86_MODRM_RM_MASK)
179 {
180 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
181 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
182 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
183 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
184 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
185 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
186 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
187 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
188 }
189 }
190
191 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
192 return u16EffAddr;
193 }
194
195 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
196 uint32_t u32EffAddr;
197 uint64_t uInfo;
198
199 /* Handle the disp32 form with no registers first. */
200 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
201 {
202 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
203 uInfo = u32EffAddr;
204 }
205 else
206 {
207 /* Get the register (or SIB) value. */
208 uInfo = 0;
209 switch ((bRm & X86_MODRM_RM_MASK))
210 {
211 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
212 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
213 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
214 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
215 case 4: /* SIB */
216 {
217 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
218 uInfo = (uint64_t)bSib << 32;
219
220 /* Get the index and scale it. */
221 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
222 {
223 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
224 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
225 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
226 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
227 case 4: u32EffAddr = 0; /*none */ break;
228 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
229 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
230 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
231 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
232 }
233 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
234
235 /* add base */
236 switch (bSib & X86_SIB_BASE_MASK)
237 {
238 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
239 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
240 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
241 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
242 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
243 case 5:
244 if ((bRm & X86_MODRM_MOD_MASK) != 0)
245 {
246 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
247 SET_SS_DEF();
248 }
249 else
250 {
251 uint32_t u32Disp;
252 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
253 u32EffAddr += u32Disp;
254 uInfo |= u32Disp;
255 }
256 break;
257 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
258 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
259 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
260 }
261 break;
262 }
263 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
264 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
265 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
266 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
267 }
268
269 /* Get and add the displacement. */
270 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
271 {
272 case 0:
273 break;
274 case 1:
275 {
276 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
277 u32EffAddr += i8Disp;
278 uInfo |= (uint32_t)(int32_t)i8Disp;
279 break;
280 }
281 case 2:
282 {
283 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
284 u32EffAddr += u32Disp;
285 uInfo |= u32Disp;
286 break;
287 }
288 default:
289 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
290 }
291 }
292
293 *puInfo = uInfo;
294 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
295 return u32EffAddr;
296 }
297
298 uint64_t u64EffAddr;
299 uint64_t uInfo;
300
301 /* Handle the rip+disp32 form with no registers first. */
302 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
303 {
304 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
305 uInfo = (uint32_t)u64EffAddr;
306 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
307 }
308 else
309 {
310 /* Get the register (or SIB) value. */
311 uInfo = 0;
312 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
313 {
314 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
315 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
316 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
317 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
318 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
319 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
320 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
321 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
322 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
323 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
324 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
325 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
326 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
327 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
328 /* SIB */
329 case 4:
330 case 12:
331 {
332 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
333 uInfo = (uint64_t)bSib << 32;
334
335 /* Get the index and scale it. */
336 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
337 {
338 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
339 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
340 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
341 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
342 case 4: u64EffAddr = 0; /*none */ break;
343 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
344 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
345 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
346 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
347 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
348 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
349 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
350 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
351 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
352 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
353 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
354 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
355 }
356 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
357
358 /* add base */
359 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
360 {
361 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
362 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
363 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
364 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
365 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
366 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
367 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
368 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
369 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
370 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
371 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
372 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
373 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
374 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
375 /* complicated encodings */
376 case 5:
377 case 13:
378 if ((bRm & X86_MODRM_MOD_MASK) != 0)
379 {
380 if (!pVCpu->iem.s.uRexB)
381 {
382 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
383 SET_SS_DEF();
384 }
385 else
386 u64EffAddr += pVCpu->cpum.GstCtx.r13;
387 }
388 else
389 {
390 uint32_t u32Disp;
391 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
392 u64EffAddr += (int32_t)u32Disp;
393 uInfo |= u32Disp;
394 }
395 break;
396 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
397 }
398 break;
399 }
400 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
401 }
402
403 /* Get and add the displacement. */
404 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
405 {
406 case 0:
407 break;
408 case 1:
409 {
410 int8_t i8Disp;
411 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
412 u64EffAddr += i8Disp;
413 uInfo |= (uint32_t)(int32_t)i8Disp;
414 break;
415 }
416 case 2:
417 {
418 uint32_t u32Disp;
419 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
420 u64EffAddr += (int32_t)u32Disp;
421 uInfo |= u32Disp;
422 break;
423 }
424 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
425 }
426
427 }
428
429 *puInfo = uInfo;
430 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
431 {
432 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
433 return u64EffAddr;
434 }
435 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
436 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
437 return u64EffAddr & UINT32_MAX;
438}
439
440
441/*********************************************************************************************************************************
442* Translation Block Cache. *
443*********************************************************************************************************************************/
444
445/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
446static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
447{
448 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
449 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
450 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
451 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
452 if (cMsSinceUse1 != cMsSinceUse2)
453 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
454 if (pTb1->cUsed != pTb2->cUsed)
455 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
456 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
457 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
458 return 0;
459}
460
461#ifdef VBOX_STRICT
462/**
463 * Assertion helper that checks a collisions list count.
464 */
465static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
466{
467 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
468 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
469 while (pTb)
470 {
471 pTb = pTb->pNext;
472 cLeft--;
473 }
474 AssertMsg(cLeft == 0,
475 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
476 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
477}
478#endif
479
480
481DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
482{
483 STAM_PROFILE_START(&pTbCache->StatPrune, a);
484
485 /*
486 * First convert the collision list to an array.
487 */
488 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
489 uintptr_t cInserted = 0;
490 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
491
492 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
493
494 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
495 {
496 apSortedTbs[cInserted++] = pTbCollision;
497 pTbCollision = pTbCollision->pNext;
498 }
499
500 /* Free any excess (impossible). */
501 if (RT_LIKELY(!pTbCollision))
502 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
503 else
504 do
505 {
506 PIEMTB pTbToFree = pTbCollision;
507 pTbCollision = pTbToFree->pNext;
508 iemTbAllocatorFree(pVCpu, pTbToFree);
509 } while (pTbCollision);
510
511 /*
512 * Sort it by most recently used and usage count.
513 */
514 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
515
516 /* We keep half the list for now. Perhaps a bit aggressive... */
517 uintptr_t const cKeep = cInserted / 2;
518
519 /* First free up the TBs we don't wish to keep (before creating the new
520 list because otherwise the free code will scan the list for each one
521 without ever finding it). */
522 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
523 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
524
525 /* Then chain the new TB together with the ones we like to keep of the
526 existing ones and insert this list into the hash table. */
527 pTbCollision = pTb;
528 for (uintptr_t idx = 0; idx < cKeep; idx++)
529 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
530 pTbCollision->pNext = NULL;
531
532 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
533#ifdef VBOX_STRICT
534 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
535#endif
536
537 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
538}
539
540
541static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
542{
543 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
544 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
545 if (!pTbOldHead)
546 {
547 pTb->pNext = NULL;
548 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
549 }
550 else
551 {
552 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
553 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
554 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
555 {
556 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
557 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
558#ifdef VBOX_STRICT
559 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
560#endif
561 }
562 else
563 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
564 }
565}
566
567
568/**
569 * Unlinks @a pTb from the hash table if found in it.
570 *
571 * @returns true if unlinked, false if not present.
572 * @param pTbCache The hash table.
573 * @param pTb The TB to remove.
574 */
575static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
576{
577 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
578 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
579 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
580
581 /*
582 * At the head of the collision list?
583 */
584 if (pTbHash == pTb)
585 {
586 if (!pTb->pNext)
587 pTbCache->apHash[idxHash] = NULL;
588 else
589 {
590 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
591 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
592#ifdef VBOX_STRICT
593 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
594#endif
595 }
596 return true;
597 }
598
599 /*
600 * Search the collision list.
601 */
602 PIEMTB const pTbHead = pTbHash;
603 while (pTbHash)
604 {
605 PIEMTB const pNextTb = pTbHash->pNext;
606 if (pNextTb == pTb)
607 {
608 pTbHash->pNext = pTb->pNext;
609 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
610#ifdef VBOX_STRICT
611 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
612#endif
613 return true;
614 }
615 pTbHash = pNextTb;
616 }
617 return false;
618}
619
620
621/**
622 * Looks up a TB for the given PC and flags in the cache.
623 *
624 * @returns Pointer to TB on success, NULL if not found.
625 * @param pVCpu The cross context virtual CPU structure of the
626 * calling thread.
627 * @param pTbCache The translation block cache.
628 * @param GCPhysPc The PC to look up a TB for.
629 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
630 * the lookup.
631 * @thread EMT(pVCpu)
632 */
633static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
634 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
635{
636 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
637 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
638 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
639#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
640 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
641#endif
642 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p L %d\n", fFlags, GCPhysPc, idxHash, pTb, cLeft));
643 while (pTb)
644 {
645 if (pTb->GCPhysPc == GCPhysPc)
646 {
647 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
648 {
649 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
650 {
651 STAM_COUNTER_INC(&pTbCache->cLookupHits);
652 AssertMsg(cLeft > 0, ("%d\n", cLeft));
653
654 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
655 pTb->cUsed++;
656#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
657 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != 16)
658 return pTb;
659 return iemNativeRecompile(pVCpu, pTb);
660#else
661 return pTb;
662#endif
663 }
664 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
665 }
666 else
667 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
668 }
669 else
670 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
671
672 pTb = pTb->pNext;
673#ifdef VBOX_STRICT
674 cLeft--;
675#endif
676 }
677 AssertMsg(cLeft == 0, ("%d\n", cLeft));
678 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
679 return pTb;
680}
681
682
683/*********************************************************************************************************************************
684* Translation Block Allocator.
685*********************************************************************************************************************************/
686/*
687 * Translation block allocationmanagement.
688 */
689
690#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
691# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
692 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
693# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
694 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
695# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
696 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
697#else
698# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
699 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
700# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
701 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
702# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
703 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
704#endif
705/** Makes a TB index from a chunk index and TB index within that chunk. */
706#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
707 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
708
709
710/**
711 * Initializes the TB allocator and cache for an EMT.
712 *
713 * @returns VBox status code.
714 * @param pVM The VM handle.
715 * @param cInitialTbs The initial number of translation blocks to
716 * preallocator.
717 * @param cMaxTbs The max number of translation blocks allowed.
718 * @param cbInitialExec The initial size of the executable memory allocator.
719 * @param cbMaxExec The max size of the executable memory allocator.
720 * @param cbChunkExec The chunk size for executable memory allocator. Zero
721 * or UINT32_MAX for automatically determining this.
722 * @thread EMT
723 */
724DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
725 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
726{
727 PVMCPUCC pVCpu = VMMGetCpu(pVM);
728 Assert(!pVCpu->iem.s.pTbCacheR3);
729 Assert(!pVCpu->iem.s.pTbAllocatorR3);
730
731 /*
732 * Calculate the chunk size of the TB allocator.
733 * The minimum chunk size is 2MiB.
734 */
735 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
736 uint32_t cbPerChunk = _2M;
737 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
738#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
739 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
740 uint8_t cChunkShift = 21 - cTbShift;
741 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
742#endif
743 for (;;)
744 {
745 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
746 break;
747 cbPerChunk *= 2;
748 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
749#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
750 cChunkShift += 1;
751#endif
752 }
753
754 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
755 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
756 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
757
758 cMaxTbs = cMaxChunks * cTbsPerChunk;
759
760 /*
761 * Allocate and initalize it.
762 */
763 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
764 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
765 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
766 if (!pTbAllocator)
767 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
768 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
769 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
770 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
771 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
772 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
773 pTbAllocator->cbPerChunk = cbPerChunk;
774 pTbAllocator->cMaxTbs = cMaxTbs;
775#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
776 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
777 pTbAllocator->cChunkShift = cChunkShift;
778 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
779#endif
780
781 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
782 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
783
784 /*
785 * Allocate the initial chunks.
786 */
787 for (uint32_t idxChunk = 0; ; idxChunk++)
788 {
789 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
790 if (!paTbs)
791 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
792 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
793 cbPerChunk, idxChunk, pVCpu->idCpu);
794
795 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
796 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
797 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
798 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
799 pTbAllocator->cTotalTbs += cTbsPerChunk;
800
801 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
802 break;
803 }
804
805 /*
806 * Calculate the size of the hash table. We double the max TB count and
807 * round it up to the nearest power of two.
808 */
809 uint32_t cCacheEntries = cMaxTbs * 2;
810 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
811 {
812 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
813 cCacheEntries = RT_BIT_32(iBitTop);
814 Assert(cCacheEntries >= cMaxTbs * 2);
815 }
816
817 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
818 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
819 if (!pTbCache)
820 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
821 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
822 cbTbCache, cCacheEntries, pVCpu->idCpu);
823
824 /*
825 * Initialize it (assumes zeroed by the allocator).
826 */
827 pTbCache->uMagic = IEMTBCACHE_MAGIC;
828 pTbCache->cHash = cCacheEntries;
829 pTbCache->uHashMask = cCacheEntries - 1;
830 Assert(pTbCache->cHash > pTbCache->uHashMask);
831 pVCpu->iem.s.pTbCacheR3 = pTbCache;
832
833 /*
834 * Initialize the native executable memory allocator.
835 */
836#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
837 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
838 AssertLogRelRCReturn(rc, rc);
839#else
840 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
841#endif
842
843 return VINF_SUCCESS;
844}
845
846
847/**
848 * Inner free worker.
849 */
850static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
851 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
852{
853 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
854 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
855 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
856 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
857
858 /*
859 * Unlink the TB from the hash table.
860 */
861 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
862
863 /*
864 * Free the TB itself.
865 */
866 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
867 {
868 case IEMTB_F_TYPE_THREADED:
869 pTbAllocator->cThreadedTbs -= 1;
870 RTMemFree(pTb->Thrd.paCalls);
871 break;
872#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
873 case IEMTB_F_TYPE_NATIVE:
874 pTbAllocator->cNativeTbs -= 1;
875 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
876 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
877 break;
878#endif
879 default:
880 AssertFailed();
881 }
882 RTMemFree(pTb->pabOpcodes);
883
884 pTb->pNext = NULL;
885 pTb->fFlags = 0;
886 pTb->GCPhysPc = UINT64_MAX;
887 pTb->Gen.uPtr = 0;
888 pTb->Gen.uData = 0;
889 pTb->cbOpcodes = 0;
890 pTb->cbOpcodesAllocated = 0;
891 pTb->pabOpcodes = NULL;
892
893 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
894 Assert(pTbAllocator->cInUseTbs > 0);
895
896 pTbAllocator->cInUseTbs -= 1;
897 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
898}
899
900
901/**
902 * Frees the given TB.
903 *
904 * @param pVCpu The cross context virtual CPU structure of the calling
905 * thread.
906 * @param pTb The translation block to free.
907 * @thread EMT(pVCpu)
908 */
909static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
910{
911 /*
912 * Validate state.
913 */
914 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
915 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
916 uint8_t const idxChunk = pTb->idxAllocChunk;
917 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
918 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
919 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
920
921 /*
922 * Call inner worker.
923 */
924 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
925}
926
927
928/**
929 * Schedules a native TB for freeing when it's not longer being executed and
930 * part of the caller's call stack.
931 *
932 * The TB will be removed from the translation block cache, though, so it isn't
933 * possible to executed it again and the IEMTB::pNext member can be used to link
934 * it together with other TBs awaiting freeing.
935 *
936 * @param pVCpu The cross context virtual CPU structure of the calling
937 * thread.
938 * @param pTb The translation block to schedule for freeing.
939 */
940static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
941{
942 /*
943 * Validate state.
944 */
945 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
946 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
947 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
948 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
949 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
950 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
951 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
952 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
953
954 /*
955 * Remove it from the cache and prepend it to the allocator's todo list.
956 */
957 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
958
959 pTb->pNext = pTbAllocator->pDelayedFreeHead;
960 pTbAllocator->pDelayedFreeHead = pTb;
961}
962
963
964/**
965 * Processes the delayed frees.
966 *
967 * This is called by the allocator function as well as the native recompile
968 * function before making any TB or executable memory allocations respectively.
969 */
970void iemTbAllocatorProcessDelayedFrees(PVMCPU pVCpu, PIEMTBALLOCATOR pTbAllocator)
971{
972 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
973 pTbAllocator->pDelayedFreeHead = NULL;
974 while (pTb)
975 {
976 PIEMTB const pTbNext = pTb->pNext;
977 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
978 iemTbAlloctorScheduleForFree(pVCpu, pTb);
979 pTb = pTbNext;
980 }
981}
982
983
984/**
985 * Grow the translation block allocator with another chunk.
986 */
987static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
988{
989 /*
990 * Validate state.
991 */
992 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
993 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
994 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
995 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
996 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
997
998 /*
999 * Allocate a new chunk and add it to the allocator.
1000 */
1001 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1002 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1003 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1004
1005 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1006 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1007 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1008 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1009 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1010 pTbAllocator->cTotalTbs += cTbsPerChunk;
1011 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1012
1013 return VINF_SUCCESS;
1014}
1015
1016
1017/**
1018 * Allocates a TB from allocator with free block.
1019 *
1020 * This is common code to both the fast and slow allocator code paths.
1021 */
1022DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1023{
1024 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1025
1026 int idxTb;
1027 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1028 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1029 pTbAllocator->cTotalTbs,
1030 pTbAllocator->iStartHint & ~(uint32_t)63);
1031 else
1032 idxTb = -1;
1033 if (idxTb < 0)
1034 {
1035 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1036 AssertLogRelReturn(idxTb >= 0, NULL);
1037 }
1038 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1039 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1040
1041 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1042 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1043 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1044 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1045 Assert(pTb->idxAllocChunk == idxChunk);
1046
1047 pTbAllocator->cInUseTbs += 1;
1048 if (fThreaded)
1049 pTbAllocator->cThreadedTbs += 1;
1050 else
1051 pTbAllocator->cNativeTbs += 1;
1052 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1053 return pTb;
1054}
1055
1056
1057/**
1058 * Slow path for iemTbAllocatorAlloc.
1059 */
1060static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1061{
1062 /*
1063 * With some luck we can add another chunk.
1064 */
1065 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1066 {
1067 int rc = iemTbAllocatorGrow(pVCpu);
1068 if (RT_SUCCESS(rc))
1069 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1070 }
1071
1072 /*
1073 * We have to prune stuff. Sigh.
1074 *
1075 * This requires scanning for older TBs and kick them out. Not sure how to
1076 * best do this as we don't want to maintain any list of TBs ordered by last
1077 * usage time. But one reasonably simple approach would be that each time we
1078 * get here we continue a sequential scan of the allocation chunks,
1079 * considering just a smallish number of TBs and freeing a fixed portion of
1080 * them. Say, we consider the next 128 TBs, freeing the least recently used
1081 * in out of groups of 4 TBs, resulting in 32 free TBs.
1082 */
1083 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1084 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1085 uint32_t const cTbsToPrune = 128;
1086 uint32_t const cTbsPerGroup = 4;
1087 uint32_t cFreedTbs = 0;
1088#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1089 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1090#else
1091 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1092#endif
1093 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1094 idxTbPruneFrom = 0;
1095 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1096 {
1097 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1098 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1099 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1100 uint32_t cMsAge = msNow - pTb->msLastUsed;
1101 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1102
1103 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1104 {
1105#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1106 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1107 { /* likely */ }
1108 else
1109 {
1110 idxInChunk2 = 0;
1111 idxChunk2 += 1;
1112 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1113 idxChunk2 = 0;
1114 }
1115#endif
1116 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1117 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1118 if ( cMsAge2 > cMsAge
1119 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1120 {
1121 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1122 pTb = pTb2;
1123 idxChunk = idxChunk2;
1124 idxInChunk = idxInChunk2;
1125 cMsAge = cMsAge2;
1126 }
1127 }
1128
1129 /* Free the TB. */
1130 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1131 cFreedTbs++; /* paranoia */
1132 }
1133 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1134 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1135
1136 /*
1137 * Allocate a TB from the ones we've pruned.
1138 */
1139 if (cFreedTbs)
1140 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1141 return NULL;
1142}
1143
1144
1145/**
1146 * Allocate a translation block.
1147 *
1148 * @returns Pointer to block on success, NULL if we're out and is unable to
1149 * free up an existing one (very unlikely once implemented).
1150 * @param pVCpu The cross context virtual CPU structure of the calling
1151 * thread.
1152 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1153 * For statistics.
1154 */
1155DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1156{
1157 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1158 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1159
1160 /* Free any pending TBs before we proceed. */
1161 if (!pTbAllocator->pDelayedFreeHead)
1162 { /* probably likely */ }
1163 else
1164 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1165
1166 /* If the allocator is full, take slow code path.*/
1167 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1168 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1169 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1170}
1171
1172
1173
1174/*********************************************************************************************************************************
1175* Threaded Recompiler Core *
1176*********************************************************************************************************************************/
1177
1178/**
1179 * Allocate a translation block for threadeded recompilation.
1180 *
1181 * This is allocated with maxed out call table and storage for opcode bytes,
1182 * because it's only supposed to be called once per EMT to allocate the TB
1183 * pointed to by IEMCPU::pThrdCompileTbR3.
1184 *
1185 * @returns Pointer to the translation block on success, NULL on failure.
1186 * @param pVM The cross context virtual machine structure.
1187 * @param pVCpu The cross context virtual CPU structure of the calling
1188 * thread.
1189 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1190 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1191 */
1192static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1193{
1194 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1195 if (pTb)
1196 {
1197 unsigned const cCalls = 256;
1198 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1199 if (pTb->Thrd.paCalls)
1200 {
1201 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1202 if (pTb->pabOpcodes)
1203 {
1204 pTb->Thrd.cAllocated = cCalls;
1205 pTb->cbOpcodesAllocated = cCalls * 16;
1206 pTb->Thrd.cCalls = 0;
1207 pTb->cbOpcodes = 0;
1208 pTb->pNext = NULL;
1209 pTb->cUsed = 0;
1210 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1211 pTb->idxAllocChunk = UINT8_MAX;
1212 pTb->GCPhysPc = GCPhysPc;
1213 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1214 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1215 pTb->cInstructions = 0;
1216
1217 /* Init the first opcode range. */
1218 pTb->cRanges = 1;
1219 pTb->aRanges[0].cbOpcodes = 0;
1220 pTb->aRanges[0].offOpcodes = 0;
1221 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1222 pTb->aRanges[0].u2Unused = 0;
1223 pTb->aRanges[0].idxPhysPage = 0;
1224 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1225 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1226
1227 return pTb;
1228 }
1229 RTMemFree(pTb->Thrd.paCalls);
1230 }
1231 RTMemFree(pTb);
1232 }
1233 RT_NOREF(pVM);
1234 return NULL;
1235}
1236
1237
1238/**
1239 * Called on the TB that are dedicated for recompilation before it's reused.
1240 *
1241 * @param pVCpu The cross context virtual CPU structure of the calling
1242 * thread.
1243 * @param pTb The translation block to reuse.
1244 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1245 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1246 */
1247static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1248{
1249 pTb->GCPhysPc = GCPhysPc;
1250 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1251 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1252 pTb->Thrd.cCalls = 0;
1253 pTb->cbOpcodes = 0;
1254 pTb->cInstructions = 0;
1255
1256 /* Init the first opcode range. */
1257 pTb->cRanges = 1;
1258 pTb->aRanges[0].cbOpcodes = 0;
1259 pTb->aRanges[0].offOpcodes = 0;
1260 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1261 pTb->aRanges[0].u2Unused = 0;
1262 pTb->aRanges[0].idxPhysPage = 0;
1263 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1264 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1265}
1266
1267
1268/**
1269 * Used to duplicate a threded translation block after recompilation is done.
1270 *
1271 * @returns Pointer to the translation block on success, NULL on failure.
1272 * @param pVM The cross context virtual machine structure.
1273 * @param pVCpu The cross context virtual CPU structure of the calling
1274 * thread.
1275 * @param pTbSrc The TB to duplicate.
1276 */
1277static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1278{
1279 /*
1280 * Just using the heap for now. Will make this more efficient and
1281 * complicated later, don't worry. :-)
1282 */
1283 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1284 if (pTb)
1285 {
1286 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1287 memcpy(pTb, pTbSrc, sizeof(*pTb));
1288 pTb->idxAllocChunk = idxAllocChunk;
1289
1290 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1291 Assert(cCalls > 0);
1292 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1293 if (pTb->Thrd.paCalls)
1294 {
1295 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1296 Assert(cbOpcodes > 0);
1297 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1298 if (pTb->pabOpcodes)
1299 {
1300 pTb->Thrd.cAllocated = cCalls;
1301 pTb->cbOpcodesAllocated = cbOpcodes;
1302 pTb->pNext = NULL;
1303 pTb->cUsed = 0;
1304 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1305 pTb->fFlags = pTbSrc->fFlags;
1306
1307 return pTb;
1308 }
1309 RTMemFree(pTb->Thrd.paCalls);
1310 }
1311 iemTbAllocatorFree(pVCpu, pTb);
1312 }
1313 RT_NOREF(pVM);
1314 return NULL;
1315
1316}
1317
1318
1319/**
1320 * Adds the given TB to the hash table.
1321 *
1322 * @param pVCpu The cross context virtual CPU structure of the calling
1323 * thread.
1324 * @param pTbCache The cache to add it to.
1325 * @param pTb The translation block to add.
1326 */
1327static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1328{
1329 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1330
1331 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1332 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1333 if (LogIs12Enabled())
1334 {
1335 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1336 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1337 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1338 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1339 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1340 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1341 pTb->aRanges[idxRange].idxPhysPage == 0
1342 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1343 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1344 }
1345}
1346
1347
1348/**
1349 * Called by opcode verifier functions when they detect a problem.
1350 */
1351void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1352{
1353 /* Unless it's safe, we can only immediately free threaded TB, as we will
1354 have more code left to execute in native TBs when fSafeToFree == false. */
1355 if (fSafeToFree || (pTb->fFlags & IEMTB_F_TYPE_THREADED))
1356 iemTbAllocatorFree(pVCpu, pTb);
1357 else
1358 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1359}
1360
1361
1362/*
1363 * Real code.
1364 */
1365
1366#ifdef LOG_ENABLED
1367/**
1368 * Logs the current instruction.
1369 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1370 * @param pszFunction The IEM function doing the execution.
1371 * @param idxInstr The instruction number in the block.
1372 */
1373static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1374{
1375# ifdef IN_RING3
1376 if (LogIs2Enabled())
1377 {
1378 char szInstr[256];
1379 uint32_t cbInstr = 0;
1380 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1381 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1382 szInstr, sizeof(szInstr), &cbInstr);
1383
1384 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1385 Log2(("**** %s fExec=%x pTb=%p #%u\n"
1386 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1387 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1388 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1389 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1390 " %s\n"
1391 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, idxInstr,
1392 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1393 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1394 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1395 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1396 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1397 szInstr));
1398
1399 if (LogIs3Enabled())
1400 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);
1401 }
1402 else
1403# endif
1404 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1405 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1406}
1407#endif /* LOG_ENABLED */
1408
1409
1410#if 0
1411static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1412{
1413 RT_NOREF(pVM, pVCpu);
1414 return rcStrict;
1415}
1416#endif
1417
1418
1419/**
1420 * Initializes the decoder state when compiling TBs.
1421 *
1422 * This presumes that fExec has already be initialized.
1423 *
1424 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1425 * to apply fixes to them as well.
1426 *
1427 * @param pVCpu The cross context virtual CPU structure of the calling
1428 * thread.
1429 * @param fReInit Clear for the first call for a TB, set for subsequent
1430 * calls from inside the compile loop where we can skip a
1431 * couple of things.
1432 * @param fExtraFlags The extra translation block flags when @a fReInit is
1433 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1434 * checked.
1435 */
1436DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1437{
1438 /* ASSUMES: That iemInitExec was already called and that anyone changing
1439 CPU state affecting the fExec bits since then will have updated fExec! */
1440 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1441 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1442
1443 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1444
1445 /* Decoder state: */
1446 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1447 pVCpu->iem.s.enmEffAddrMode = enmMode;
1448 if (enmMode != IEMMODE_64BIT)
1449 {
1450 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1451 pVCpu->iem.s.enmEffOpSize = enmMode;
1452 }
1453 else
1454 {
1455 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1456 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1457 }
1458 pVCpu->iem.s.fPrefixes = 0;
1459 pVCpu->iem.s.uRexReg = 0;
1460 pVCpu->iem.s.uRexB = 0;
1461 pVCpu->iem.s.uRexIndex = 0;
1462 pVCpu->iem.s.idxPrefix = 0;
1463 pVCpu->iem.s.uVex3rdReg = 0;
1464 pVCpu->iem.s.uVexLength = 0;
1465 pVCpu->iem.s.fEvexStuff = 0;
1466 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1467 pVCpu->iem.s.offModRm = 0;
1468 pVCpu->iem.s.iNextMapping = 0;
1469
1470 if (!fReInit)
1471 {
1472 pVCpu->iem.s.cActiveMappings = 0;
1473 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1474 pVCpu->iem.s.fEndTb = false;
1475 pVCpu->iem.s.fTbCheckOpcodes = false;
1476 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1477 pVCpu->iem.s.fTbCrossedPage = false;
1478 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1479 pVCpu->iem.s.fTbCurInstrIsSti = false;
1480 /* Force RF clearing and TF checking on first instruction in the block
1481 as we don't really know what came before and should assume the worst: */
1482 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1483 }
1484 else
1485 {
1486 Assert(pVCpu->iem.s.cActiveMappings == 0);
1487 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1488 Assert(pVCpu->iem.s.fEndTb == false);
1489 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1490 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1491 }
1492 pVCpu->iem.s.fTbCurInstr = 0;
1493
1494#ifdef DBGFTRACE_ENABLED
1495 switch (IEM_GET_CPU_MODE(pVCpu))
1496 {
1497 case IEMMODE_64BIT:
1498 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1499 break;
1500 case IEMMODE_32BIT:
1501 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1502 break;
1503 case IEMMODE_16BIT:
1504 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1505 break;
1506 }
1507#endif
1508}
1509
1510
1511/**
1512 * Initializes the opcode fetcher when starting the compilation.
1513 *
1514 * @param pVCpu The cross context virtual CPU structure of the calling
1515 * thread.
1516 */
1517DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1518{
1519 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1520#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1521 pVCpu->iem.s.offOpcode = 0;
1522#else
1523 RT_NOREF(pVCpu);
1524#endif
1525}
1526
1527
1528/**
1529 * Re-initializes the opcode fetcher between instructions while compiling.
1530 *
1531 * @param pVCpu The cross context virtual CPU structure of the calling
1532 * thread.
1533 */
1534DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1535{
1536 if (pVCpu->iem.s.pbInstrBuf)
1537 {
1538 uint64_t off = pVCpu->cpum.GstCtx.rip;
1539 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1540 off += pVCpu->cpum.GstCtx.cs.u64Base;
1541 off -= pVCpu->iem.s.uInstrBufPc;
1542 if (off < pVCpu->iem.s.cbInstrBufTotal)
1543 {
1544 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1545 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1546 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1547 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1548 else
1549 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1550 }
1551 else
1552 {
1553 pVCpu->iem.s.pbInstrBuf = NULL;
1554 pVCpu->iem.s.offInstrNextByte = 0;
1555 pVCpu->iem.s.offCurInstrStart = 0;
1556 pVCpu->iem.s.cbInstrBuf = 0;
1557 pVCpu->iem.s.cbInstrBufTotal = 0;
1558 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1559 }
1560 }
1561 else
1562 {
1563 pVCpu->iem.s.offInstrNextByte = 0;
1564 pVCpu->iem.s.offCurInstrStart = 0;
1565 pVCpu->iem.s.cbInstrBuf = 0;
1566 pVCpu->iem.s.cbInstrBufTotal = 0;
1567#ifdef VBOX_STRICT
1568 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1569#endif
1570 }
1571#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1572 pVCpu->iem.s.offOpcode = 0;
1573#endif
1574}
1575
1576
1577DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1578{
1579 switch (cbInstr)
1580 {
1581 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1582 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1583 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1584 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1585 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1586 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1587 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1588 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1589 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1590 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1591 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1592 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1593 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1594 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1595 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1596 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1597 }
1598}
1599
1600
1601/**
1602 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1603 *
1604 * - CS LIM check required.
1605 * - Must recheck opcode bytes.
1606 * - Previous instruction branched.
1607 * - TLB load detected, probably due to page crossing.
1608 *
1609 * @returns true if everything went well, false if we're out of space in the TB
1610 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1611 * @param pVCpu The cross context virtual CPU structure of the calling
1612 * thread.
1613 * @param pTb The translation block being compiled.
1614 */
1615bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1616{
1617 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1618 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1619#if 0
1620 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1621 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1622#endif
1623
1624 /*
1625 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1626 * see if it's needed to start checking.
1627 */
1628 bool fConsiderCsLimChecking;
1629 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1630 if ( fMode == IEM_F_MODE_X86_64BIT
1631 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
1632 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
1633 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
1634 fConsiderCsLimChecking = false; /* already enabled or not needed */
1635 else
1636 {
1637 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1638 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1639 fConsiderCsLimChecking = true; /* likely */
1640 else
1641 {
1642 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
1643 return false;
1644 }
1645 }
1646
1647 /*
1648 * Prepare call now, even before we know if can accept the instruction in this TB.
1649 * This allows us amending parameters w/o making every case suffer.
1650 */
1651 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1652 uint16_t const offOpcode = pTb->cbOpcodes;
1653 uint8_t idxRange = pTb->cRanges - 1;
1654
1655 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1656 pCall->idxInstr = pTb->cInstructions;
1657 pCall->offOpcode = offOpcode;
1658 pCall->idxRange = idxRange;
1659 pCall->cbOpcode = cbInstr;
1660 pCall->auParams[0] = cbInstr;
1661 pCall->auParams[1] = idxRange;
1662 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1663
1664/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1665 * gotten onto. If we do, stop */
1666
1667 /*
1668 * Case 1: We've branched (RIP changed).
1669 *
1670 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1671 * Req: 1 extra range, no extra phys.
1672 *
1673 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1674 * necessary (fTbCrossedPage is true).
1675 * Req: 1 extra range, probably 1 extra phys page entry.
1676 *
1677 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1678 * but in addition we cross into the following page and require
1679 * another TLB load.
1680 * Req: 2 extra ranges, probably 2 extra phys page entries.
1681 *
1682 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1683 * the following page (thus fTbCrossedPage is true).
1684 * Req: 2 extra ranges, probably 1 extra phys page entry.
1685 *
1686 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1687 * it may trigger "spuriously" from the CPU point of view because of
1688 * physical page changes that'll invalid the physical TLB and trigger a
1689 * call to the function. In theory this be a big deal, just a bit
1690 * performance loss as we'll pick the LoadingTlb variants.
1691 *
1692 * Note! We do not currently optimize branching to the next instruction (sorry
1693 * 32-bit PIC code). We could maybe do that in the branching code that
1694 * sets (or not) fTbBranched.
1695 */
1696 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1697 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1698 * code. This'll require filtering out far jmps and calls, as they
1699 * load CS which should technically be considered indirect since the
1700 * GDT/LDT entry's base address can be modified independently from
1701 * the code. */
1702 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
1703 {
1704 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1705 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1706 {
1707 /* 1a + 1b - instruction fully within the branched to page. */
1708 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1709 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1710
1711 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1712 {
1713 /* Check that we've got a free range. */
1714 idxRange += 1;
1715 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1716 { /* likely */ }
1717 else
1718 {
1719 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1720 return false;
1721 }
1722 pCall->idxRange = idxRange;
1723 pCall->auParams[1] = idxRange;
1724 pCall->auParams[2] = 0;
1725
1726 /* Check that we've got a free page slot. */
1727 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1728 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1729 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1730 pTb->aRanges[idxRange].idxPhysPage = 0;
1731 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1732 || pTb->aGCPhysPages[0] == GCPhysNew)
1733 {
1734 pTb->aGCPhysPages[0] = GCPhysNew;
1735 pTb->aRanges[idxRange].idxPhysPage = 1;
1736 }
1737 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1738 || pTb->aGCPhysPages[1] == GCPhysNew)
1739 {
1740 pTb->aGCPhysPages[1] = GCPhysNew;
1741 pTb->aRanges[idxRange].idxPhysPage = 2;
1742 }
1743 else
1744 {
1745 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1746 return false;
1747 }
1748
1749 /* Finish setting up the new range. */
1750 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1751 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1752 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1753 pTb->aRanges[idxRange].u2Unused = 0;
1754 pTb->cRanges++;
1755 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
1756 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
1757 pTb->aRanges[idxRange].offOpcodes));
1758 }
1759 else
1760 {
1761 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1762 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1763 }
1764
1765 /* Determin which function we need to load & check.
1766 Note! For jumps to a new page, we'll set both fTbBranched and
1767 fTbCrossedPage to avoid unnecessary TLB work for intra
1768 page branching */
1769 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1770 || pVCpu->iem.s.fTbCrossedPage)
1771 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1772 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1773 : !fConsiderCsLimChecking
1774 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1775 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1776 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1777 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1778 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1779 : !fConsiderCsLimChecking
1780 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1781 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1782 else
1783 {
1784 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1785 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1786 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1787 : !fConsiderCsLimChecking
1788 ? kIemThreadedFunc_BltIn_CheckOpcodes
1789 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1790 }
1791 }
1792 else
1793 {
1794 /* 1c + 1d - instruction crosses pages. */
1795 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1796 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1797
1798 /* Lazy bird: Check that this isn't case 1c, since we've already
1799 load the first physical address. End the TB and
1800 make it a case 2b instead.
1801
1802 Hmm. Too much bother to detect, so just do the same
1803 with case 1d as well. */
1804#if 0 /** @todo get back to this later when we've got the actual branch code in
1805 * place. */
1806 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1807
1808 /* Check that we've got two free ranges. */
1809 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1810 { /* likely */ }
1811 else
1812 return false;
1813 idxRange += 1;
1814 pCall->idxRange = idxRange;
1815 pCall->auParams[1] = idxRange;
1816 pCall->auParams[2] = 0;
1817
1818 /* ... */
1819
1820#else
1821 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1822 return false;
1823#endif
1824 }
1825 }
1826
1827 /*
1828 * Case 2: Page crossing.
1829 *
1830 * Sub-case 2a: The instruction starts on the first byte in the next page.
1831 *
1832 * Sub-case 2b: The instruction has opcode bytes in both the current and
1833 * following page.
1834 *
1835 * Both cases requires a new range table entry and probably a new physical
1836 * page entry. The difference is in which functions to emit and whether to
1837 * add bytes to the current range.
1838 */
1839 else if (pVCpu->iem.s.fTbCrossedPage)
1840 {
1841 /* Check that we've got a free range. */
1842 idxRange += 1;
1843 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1844 { /* likely */ }
1845 else
1846 {
1847 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1848 return false;
1849 }
1850
1851 /* Check that we've got a free page slot. */
1852 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1853 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1854 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1855 pTb->aRanges[idxRange].idxPhysPage = 0;
1856 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1857 || pTb->aGCPhysPages[0] == GCPhysNew)
1858 {
1859 pTb->aGCPhysPages[0] = GCPhysNew;
1860 pTb->aRanges[idxRange].idxPhysPage = 1;
1861 }
1862 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1863 || pTb->aGCPhysPages[1] == GCPhysNew)
1864 {
1865 pTb->aGCPhysPages[1] = GCPhysNew;
1866 pTb->aRanges[idxRange].idxPhysPage = 2;
1867 }
1868 else
1869 {
1870 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1871 return false;
1872 }
1873
1874 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
1875 {
1876 Assert(pVCpu->iem.s.offCurInstrStart == 0);
1877 pCall->idxRange = idxRange;
1878 pCall->auParams[1] = idxRange;
1879 pCall->auParams[2] = 0;
1880
1881 /* Finish setting up the new range. */
1882 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1883 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1884 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1885 pTb->aRanges[idxRange].u2Unused = 0;
1886 pTb->cRanges++;
1887 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
1888 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
1889 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
1890
1891 /* Determin which function we need to load & check. */
1892 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1893 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1894 : !fConsiderCsLimChecking
1895 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1896 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
1897 }
1898 else
1899 {
1900 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1901 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1902 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1903 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
1904
1905 /* We've good. Split the instruction over the old and new range table entries. */
1906 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
1907
1908 pTb->aRanges[idxRange].offPhysPage = 0;
1909 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
1910 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
1911 pTb->aRanges[idxRange].u2Unused = 0;
1912 pTb->cRanges++;
1913 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
1914 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
1915 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
1916
1917 /* Determin which function we need to load & check. */
1918 if (pVCpu->iem.s.fTbCheckOpcodes)
1919 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1920 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1921 : !fConsiderCsLimChecking
1922 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1923 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
1924 else
1925 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1926 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1927 : !fConsiderCsLimChecking
1928 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1929 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
1930 }
1931 }
1932
1933 /*
1934 * Regular case: No new range required.
1935 */
1936 else
1937 {
1938 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
1939 if (pVCpu->iem.s.fTbCheckOpcodes)
1940 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1941 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1942 : kIemThreadedFunc_BltIn_CheckOpcodes;
1943 else
1944 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
1945
1946 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1947 pTb->cbOpcodes = offOpcode + cbInstr;
1948 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1949 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1950 }
1951
1952 /*
1953 * Commit the call.
1954 */
1955 pTb->Thrd.cCalls++;
1956
1957 /*
1958 * Clear state.
1959 */
1960 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1961 pVCpu->iem.s.fTbCrossedPage = false;
1962 pVCpu->iem.s.fTbCheckOpcodes = false;
1963
1964 /*
1965 * Copy opcode bytes.
1966 */
1967 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1968 pTb->cbOpcodes = offOpcode + cbInstr;
1969 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1970
1971 return true;
1972}
1973
1974
1975/**
1976 * Worker for iemThreadedCompileBeginEmitCallsComplications and
1977 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
1978 *
1979 * @returns true if anything is pending, false if not.
1980 * @param pVCpu The cross context virtual CPU structure of the calling
1981 * thread.
1982 */
1983DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
1984{
1985 uint64_t fCpu = pVCpu->fLocalForcedActions;
1986 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
1987#if 1
1988 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
1989 if (RT_LIKELY( !fCpu
1990 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1991 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
1992 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
1993 return false;
1994 return true;
1995#else
1996 return false;
1997#endif
1998
1999}
2000
2001
2002/**
2003 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2004 * set.
2005 *
2006 * @returns true if we should continue, false if an IRQ is deliverable or a
2007 * relevant force flag is pending.
2008 * @param pVCpu The cross context virtual CPU structure of the calling
2009 * thread.
2010 * @param pTb The translation block being compiled.
2011 * @sa iemThreadedCompileCheckIrq
2012 */
2013bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2014{
2015 /*
2016 * Skip this we've already emitted a call after the previous instruction
2017 * or if it's the first call, as we're always checking FFs between blocks.
2018 */
2019 uint32_t const idxCall = pTb->Thrd.cCalls;
2020 if ( idxCall > 0
2021 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2022 {
2023 /* Emit the call. */
2024 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2025 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2026 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2027 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2028 pCall->idxInstr = pTb->cInstructions;
2029 pCall->uUnused0 = 0;
2030 pCall->offOpcode = 0;
2031 pCall->cbOpcode = 0;
2032 pCall->idxRange = 0;
2033 pCall->auParams[0] = 0;
2034 pCall->auParams[1] = 0;
2035 pCall->auParams[2] = 0;
2036 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2037
2038 /* Reset the IRQ check value. */
2039 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2040
2041 /*
2042 * Check for deliverable IRQs and pending force flags.
2043 */
2044 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2045 }
2046 return true; /* continue */
2047}
2048
2049
2050/**
2051 * Emits an IRQ check call and checks for pending IRQs.
2052 *
2053 * @returns true if we should continue, false if an IRQ is deliverable or a
2054 * relevant force flag is pending.
2055 * @param pVCpu The cross context virtual CPU structure of the calling
2056 * thread.
2057 * @param pTb The transation block.
2058 * @sa iemThreadedCompileBeginEmitCallsComplications
2059 */
2060static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2061{
2062 /* Check again in a little bit, unless it is immediately following an STI
2063 in which case we *must* check immediately after the next instruction
2064 as well in case it's executed with interrupt inhibition. We could
2065 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2066 bs3-timers-1 which is doing sti + sti + cli. */
2067 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2068 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2069 else
2070 {
2071 pVCpu->iem.s.fTbCurInstrIsSti = false;
2072 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2073 }
2074 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2075
2076 /*
2077 * Emit the call.
2078 */
2079 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2080 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2081 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2082 pCall->idxInstr = pTb->cInstructions;
2083 pCall->uUnused0 = 0;
2084 pCall->offOpcode = 0;
2085 pCall->cbOpcode = 0;
2086 pCall->idxRange = 0;
2087 pCall->auParams[0] = 0;
2088 pCall->auParams[1] = 0;
2089 pCall->auParams[2] = 0;
2090
2091 /*
2092 * Check for deliverable IRQs and pending force flags.
2093 */
2094 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2095}
2096
2097
2098/**
2099 * Compiles a new TB and executes it.
2100 *
2101 * We combine compilation and execution here as it makes it simpler code flow
2102 * in the main loop and it allows interpreting while compiling if we want to
2103 * explore that option.
2104 *
2105 * @returns Strict VBox status code.
2106 * @param pVM The cross context virtual machine structure.
2107 * @param pVCpu The cross context virtual CPU structure of the calling
2108 * thread.
2109 * @param GCPhysPc The physical address corresponding to the current
2110 * RIP+CS.BASE.
2111 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2112 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2113 */
2114static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2115{
2116 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2117 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2118
2119 /*
2120 * Get the TB we use for the recompiling. This is a maxed-out TB so
2121 * that'll we'll make a more efficient copy of when we're done compiling.
2122 */
2123 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2124 if (pTb)
2125 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2126 else
2127 {
2128 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2129 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2130 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2131 }
2132
2133 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2134 functions may get at it. */
2135 pVCpu->iem.s.pCurTbR3 = pTb;
2136
2137#if 0
2138 /* Make sure the CheckIrq condition matches the one in EM. */
2139 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2140 const uint32_t cZeroCalls = 1;
2141#else
2142 const uint32_t cZeroCalls = 0;
2143#endif
2144
2145 /*
2146 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2147 */
2148 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2149 iemThreadedCompileInitOpcodeFetching(pVCpu);
2150 VBOXSTRICTRC rcStrict;
2151 for (;;)
2152 {
2153 /* Process the next instruction. */
2154#ifdef LOG_ENABLED
2155 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2156 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2157 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2158#endif
2159 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2160 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2161
2162 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2163 if ( rcStrict == VINF_SUCCESS
2164 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2165 && !pVCpu->iem.s.fEndTb)
2166 {
2167 Assert(pTb->Thrd.cCalls > cCallsPrev);
2168 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2169
2170 pVCpu->iem.s.cInstructions++;
2171 }
2172 else
2173 {
2174 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2175 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2176 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2177 rcStrict = VINF_SUCCESS;
2178
2179 if (pTb->Thrd.cCalls > cZeroCalls)
2180 {
2181 if (cCallsPrev != pTb->Thrd.cCalls)
2182 pVCpu->iem.s.cInstructions++;
2183 break;
2184 }
2185
2186 pVCpu->iem.s.pCurTbR3 = NULL;
2187 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2188 }
2189
2190 /* Check for IRQs? */
2191 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2192 pVCpu->iem.s.cInstrTillIrqCheck--;
2193 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2194 break;
2195
2196 /* Still space in the TB? */
2197 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2198 && pTb->cbOpcodes + 16 <= pTb->cbOpcodesAllocated)
2199 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2200 else
2201 {
2202 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2203 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2204 break;
2205 }
2206 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2207 }
2208
2209 /*
2210 * Duplicate the TB into a completed one and link it.
2211 */
2212 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2213 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2214
2215 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2216
2217#ifdef IEM_COMPILE_ONLY_MODE
2218 /*
2219 * Execute the translation block.
2220 */
2221#endif
2222
2223 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2224}
2225
2226
2227
2228/*********************************************************************************************************************************
2229* Recompiled Execution Core *
2230*********************************************************************************************************************************/
2231
2232/**
2233 * Executes a translation block.
2234 *
2235 * @returns Strict VBox status code.
2236 * @param pVCpu The cross context virtual CPU structure of the calling
2237 * thread.
2238 * @param pTb The translation block to execute.
2239 */
2240static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2241{
2242 /*
2243 * Check the opcodes in the first page before starting execution.
2244 */
2245 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2246 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2247 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2248 { /* likely */ }
2249 else
2250 {
2251 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2252 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2253 return VINF_SUCCESS;
2254 }
2255
2256 /*
2257 * Set the current TB so CIMPL functions may get at it.
2258 */
2259 pVCpu->iem.s.pCurTbR3 = pTb;
2260
2261 /*
2262 * Execute the block.
2263 */
2264#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2265 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2266 {
2267 pVCpu->iem.s.cTbExecNative++;
2268# ifdef LOG_ENABLED
2269 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2270# endif
2271# ifdef RT_ARCH_AMD64
2272 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2273# else
2274 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2275# endif
2276 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2277 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2278 { /* likely */ }
2279 else
2280 {
2281 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2282 pVCpu->iem.s.pCurTbR3 = NULL;
2283 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2284
2285 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2286 only to break out of TB execution early. */
2287 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2288 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2289 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2290 }
2291 }
2292 else
2293#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2294 {
2295 /*
2296 * The threaded execution loop.
2297 */
2298 pVCpu->iem.s.cTbExecThreaded++;
2299#ifdef LOG_ENABLED
2300 uint64_t uRipPrev = UINT64_MAX;
2301#endif
2302 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2303 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2304 while (cCallsLeft-- > 0)
2305 {
2306#ifdef LOG_ENABLED
2307 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2308 {
2309 uRipPrev = pVCpu->cpum.GstCtx.rip;
2310 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2311 }
2312 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2313 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2314 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2315#endif
2316 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2317 pCallEntry->auParams[0],
2318 pCallEntry->auParams[1],
2319 pCallEntry->auParams[2]);
2320 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2321 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2322 pCallEntry++;
2323 else
2324 {
2325 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2326 pVCpu->iem.s.pCurTbR3 = NULL;
2327 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2328
2329 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2330 only to break out of TB execution early. */
2331 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2332 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2333 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2334 }
2335 }
2336 }
2337
2338 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2339 pVCpu->iem.s.pCurTbR3 = NULL;
2340 return VINF_SUCCESS;
2341}
2342
2343
2344/**
2345 * This is called when the PC doesn't match the current pbInstrBuf.
2346 *
2347 * Upon return, we're ready for opcode fetching. But please note that
2348 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2349 * MMIO or unassigned).
2350 */
2351static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2352{
2353 pVCpu->iem.s.pbInstrBuf = NULL;
2354 pVCpu->iem.s.offCurInstrStart = 0;
2355 pVCpu->iem.s.offInstrNextByte = 0;
2356 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2357 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2358}
2359
2360
2361/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2362DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2363{
2364 /*
2365 * Set uCurTbStartPc to RIP and calc the effective PC.
2366 */
2367 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2368 pVCpu->iem.s.uCurTbStartPc = uPc;
2369 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2370 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2371
2372 /*
2373 * Advance within the current buffer (PAGE) when possible.
2374 */
2375 if (pVCpu->iem.s.pbInstrBuf)
2376 {
2377 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2378 if (off < pVCpu->iem.s.cbInstrBufTotal)
2379 {
2380 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2381 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2382 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2383 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2384 else
2385 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2386
2387 return pVCpu->iem.s.GCPhysInstrBuf + off;
2388 }
2389 }
2390 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2391}
2392
2393
2394/**
2395 * Determines the extra IEMTB_F_XXX flags.
2396 *
2397 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2398 * IEMTB_F_CS_LIM_CHECKS (or zero).
2399 * @param pVCpu The cross context virtual CPU structure of the calling
2400 * thread.
2401 */
2402DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2403{
2404 uint32_t fRet = 0;
2405
2406 /*
2407 * Determine the inhibit bits.
2408 */
2409 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2410 { /* typical */ }
2411 else
2412 {
2413 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2414 fRet |= IEMTB_F_INHIBIT_SHADOW;
2415 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2416 fRet |= IEMTB_F_INHIBIT_NMI;
2417 }
2418
2419 /*
2420 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2421 * likely to go invalid before the end of the translation block.
2422 */
2423 if (IEM_IS_64BIT_CODE(pVCpu))
2424 return fRet;
2425
2426 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2427 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2428 return fRet;
2429 return fRet | IEMTB_F_CS_LIM_CHECKS;
2430}
2431
2432
2433VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2434{
2435 /*
2436 * See if there is an interrupt pending in TRPM, inject it if we can.
2437 */
2438 if (!TRPMHasTrap(pVCpu))
2439 { /* likely */ }
2440 else
2441 {
2442 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2443 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2444 { /*likely */ }
2445 else
2446 return rcStrict;
2447 }
2448
2449 /*
2450 * Init the execution environment.
2451 */
2452 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2453 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2454 { }
2455 else
2456 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2457
2458 /*
2459 * Run-loop.
2460 *
2461 * If we're using setjmp/longjmp we combine all the catching here to avoid
2462 * having to call setjmp for each block we're executing.
2463 */
2464 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2465 for (;;)
2466 {
2467 PIEMTB pTb = NULL;
2468 VBOXSTRICTRC rcStrict;
2469 IEM_TRY_SETJMP(pVCpu, rcStrict)
2470 {
2471 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2472 for (uint32_t iIterations = 0; ; iIterations++)
2473 {
2474 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2475 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2476 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2477
2478 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2479 if (pTb)
2480 rcStrict = iemTbExec(pVCpu, pTb);
2481 else
2482 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2483 if (rcStrict == VINF_SUCCESS)
2484 {
2485 Assert(pVCpu->iem.s.cActiveMappings == 0);
2486
2487 uint64_t fCpu = pVCpu->fLocalForcedActions;
2488 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2489 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2490 | VMCPU_FF_TLB_FLUSH
2491 | VMCPU_FF_UNHALT );
2492 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2493 if (RT_LIKELY( ( !fCpu
2494 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2495 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2496 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2497 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2498 {
2499 if (RT_LIKELY( (iIterations & cPollRate) != 0
2500 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2501 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2502 else
2503 return VINF_SUCCESS;
2504 }
2505 else
2506 return VINF_SUCCESS;
2507 }
2508 else
2509 return rcStrict;
2510 }
2511 }
2512 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2513 {
2514 pVCpu->iem.s.cLongJumps++;
2515 if (pVCpu->iem.s.cActiveMappings > 0)
2516 iemMemRollback(pVCpu);
2517
2518#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2519 /* If pTb isn't NULL we're in iemTbExec. */
2520 if (!pTb)
2521 {
2522 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2523 pTb = pVCpu->iem.s.pCurTbR3;
2524 if (pTb)
2525 {
2526 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
2527 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
2528 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
2529 }
2530 }
2531#endif
2532 return rcStrict;
2533 }
2534 IEM_CATCH_LONGJMP_END(pVCpu);
2535 }
2536}
2537
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette